From cce86c9b400b8eb5fddbbc4f7dc83e9e68ec009e Mon Sep 17 00:00:00 2001 From: valadaptive Date: Tue, 11 Nov 2025 18:30:36 -0500 Subject: [PATCH 1/6] Remove the Arch trait It wasn't operating at the correct level of abstraction. --- fearless_simd_gen/src/arch/fallback.rs | 7 +++-- fearless_simd_gen/src/arch/mod.rs | 9 ------- fearless_simd_gen/src/arch/neon.rs | 7 +++-- fearless_simd_gen/src/arch/wasm.rs | 7 +++-- fearless_simd_gen/src/arch/x86.rs | 7 +++-- fearless_simd_gen/src/mk_avx2.rs | 20 +++++--------- fearless_simd_gen/src/mk_fallback.rs | 3 +-- fearless_simd_gen/src/mk_neon.rs | 1 - fearless_simd_gen/src/mk_sse4_2.rs | 36 ++++++++------------------ fearless_simd_gen/src/mk_wasm.rs | 2 +- 10 files changed, 31 insertions(+), 68 deletions(-) diff --git a/fearless_simd_gen/src/arch/fallback.rs b/fearless_simd_gen/src/arch/fallback.rs index a71f144d..e976f39b 100644 --- a/fearless_simd_gen/src/arch/fallback.rs +++ b/fearless_simd_gen/src/arch/fallback.rs @@ -7,7 +7,6 @@ reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" )] -use crate::arch::Arch; use crate::types::{ScalarType, VecType}; use proc_macro2::{Ident, Span, TokenStream}; use quote::quote; @@ -73,8 +72,8 @@ pub fn simple_intrinsic(name: &str, ty: &VecType) -> TokenStream { pub struct Fallback; -impl Arch for Fallback { - fn arch_ty(&self, ty: &VecType) -> TokenStream { +impl Fallback { + pub(crate) fn arch_ty(&self, ty: &VecType) -> TokenStream { let scalar = match ty.scalar { ScalarType::Float => "f", ScalarType::Unsigned => "u", @@ -85,7 +84,7 @@ impl Arch for Fallback { quote! { #ident } } - fn expr(&self, op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream { + pub(crate) fn expr(&self, op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream { if let Some(translated) = translate_op(op, ty.scalar == ScalarType::Float) { let intrinsic = simple_intrinsic(translated, ty); quote! { #intrinsic ( #( #args ),* ) } diff --git a/fearless_simd_gen/src/arch/mod.rs b/fearless_simd_gen/src/arch/mod.rs index a1d2764b..ad7716f0 100644 --- a/fearless_simd_gen/src/arch/mod.rs +++ b/fearless_simd_gen/src/arch/mod.rs @@ -5,12 +5,3 @@ pub(crate) mod fallback; pub(crate) mod neon; pub(crate) mod wasm; pub(crate) mod x86; - -use proc_macro2::TokenStream; - -use crate::types::VecType; - -pub(crate) trait Arch { - fn arch_ty(&self, ty: &VecType) -> TokenStream; - fn expr(&self, op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream; -} diff --git a/fearless_simd_gen/src/arch/neon.rs b/fearless_simd_gen/src/arch/neon.rs index 24f9a833..0a55550a 100644 --- a/fearless_simd_gen/src/arch/neon.rs +++ b/fearless_simd_gen/src/arch/neon.rs @@ -6,7 +6,6 @@ reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" )] -use crate::arch::Arch; use crate::types::{ScalarType, VecType}; use proc_macro2::{Ident, Span, TokenStream}; use quote::quote; @@ -46,8 +45,8 @@ fn translate_op(op: &str) -> Option<&'static str> { }) } -impl Arch for Neon { - fn arch_ty(&self, ty: &VecType) -> TokenStream { +impl Neon { + pub(crate) fn arch_ty(&self, ty: &VecType) -> TokenStream { let scalar = match ty.scalar { ScalarType::Float => "float", ScalarType::Unsigned => "uint", @@ -65,7 +64,7 @@ impl Arch for Neon { } // expects args and return value in arch dialect - fn expr(&self, op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream { + pub(crate) fn expr(&self, op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream { // There is no logical NOT for 64-bit, so we need this workaround. if op == "not" && ty.scalar_bits == 64 && ty.scalar == ScalarType::Mask { return quote! { vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a.into()))) }; diff --git a/fearless_simd_gen/src/arch/wasm.rs b/fearless_simd_gen/src/arch/wasm.rs index dd4db767..dcbbc5b0 100644 --- a/fearless_simd_gen/src/arch/wasm.rs +++ b/fearless_simd_gen/src/arch/wasm.rs @@ -8,7 +8,6 @@ reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" )] -use crate::arch::Arch; use crate::types::{ScalarType, VecType}; use proc_macro2::{Ident, Span, TokenStream}; use quote::quote; @@ -61,8 +60,8 @@ fn v128_intrinsic(name: &str) -> TokenStream { quote! { #combined_ident } } -impl Arch for Wasm { - fn arch_ty(&self, ty: &VecType) -> TokenStream { +impl Wasm { + pub(crate) fn arch_ty(&self, ty: &VecType) -> TokenStream { let scalar = match ty.scalar { ScalarType::Float => "f", ScalarType::Unsigned => "u", @@ -74,7 +73,7 @@ impl Arch for Wasm { } // expects args and return value in arch dialect - fn expr(&self, op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream { + pub(crate) fn expr(&self, op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream { if let Some(translated) = translate_op(op) { let intrinsic = match translated { "not" => v128_intrinsic(translated), diff --git a/fearless_simd_gen/src/arch/x86.rs b/fearless_simd_gen/src/arch/x86.rs index dcaca2b4..f6d1645d 100644 --- a/fearless_simd_gen/src/arch/x86.rs +++ b/fearless_simd_gen/src/arch/x86.rs @@ -6,7 +6,6 @@ reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" )] -use crate::arch::Arch; use crate::types::{ScalarType, VecType}; use proc_macro2::{Ident, Span, TokenStream}; use quote::{format_ident, quote}; @@ -39,8 +38,8 @@ pub(crate) fn translate_op(op: &str) -> Option<&'static str> { }) } -impl Arch for X86 { - fn arch_ty(&self, ty: &VecType) -> TokenStream { +impl X86 { + pub(crate) fn arch_ty(&self, ty: &VecType) -> TokenStream { let suffix = match (ty.scalar, ty.scalar_bits) { (ScalarType::Float, 32) => "", (ScalarType::Float, 64) => "d", @@ -52,7 +51,7 @@ impl Arch for X86 { quote! { #ident } } - fn expr(&self, op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream { + pub(crate) fn expr(&self, op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream { if let Some(op_name) = translate_op(op) { let sign_aware = matches!(op, "max" | "min"); diff --git a/fearless_simd_gen/src/mk_avx2.rs b/fearless_simd_gen/src/mk_avx2.rs index 88007383..aea62536 100644 --- a/fearless_simd_gen/src/mk_avx2.rs +++ b/fearless_simd_gen/src/mk_avx2.rs @@ -1,7 +1,6 @@ // Copyright 2025 the Fearless_SIMD Authors // SPDX-License-Identifier: Apache-2.0 OR MIT -use crate::arch::Arch; use crate::arch::x86::{ X86, cast_ident, coarse_type, extend_intrinsic, intrinsic_ident, pack_intrinsic, set1_intrinsic, simple_intrinsic, @@ -87,7 +86,7 @@ fn mk_simd_impl() -> TokenStream { continue; } - let method = make_method(method, sig, vec_ty, X86, vec_ty.n_bits()); + let method = make_method(method, sig, vec_ty, vec_ty.n_bits()); methods.push(method); } @@ -159,13 +158,7 @@ fn mk_type_impl() -> TokenStream { } } -fn make_method( - method: &str, - sig: OpSig, - vec_ty: &VecType, - arch: impl Arch, - ty_bits: usize, -) -> TokenStream { +fn make_method(method: &str, sig: OpSig, vec_ty: &VecType, ty_bits: usize) -> TokenStream { let scalar_bits = vec_ty.scalar_bits; let ty_name = vec_ty.rust_name(); let method_name = format!("{method}_{ty_name}"); @@ -183,12 +176,12 @@ fn make_method( match sig { OpSig::Splat => mk_sse4_2::handle_splat(method_sig, vec_ty, scalar_bits, ty_bits), - OpSig::Compare => handle_compare(method_sig, method, vec_ty, scalar_bits, ty_bits, arch), - OpSig::Unary => mk_sse4_2::handle_unary(method_sig, method, vec_ty, arch), + OpSig::Compare => handle_compare(method_sig, method, vec_ty, scalar_bits, ty_bits), + OpSig::Unary => mk_sse4_2::handle_unary(method_sig, method, vec_ty), OpSig::WidenNarrow(t) => { handle_widen_narrow(method_sig, method, vec_ty, scalar_bits, ty_bits, t) } - OpSig::Binary => mk_sse4_2::handle_binary(method_sig, method, vec_ty, arch), + OpSig::Binary => mk_sse4_2::handle_binary(method_sig, method, vec_ty), OpSig::Shift => mk_sse4_2::handle_shift(method_sig, method, vec_ty, scalar_bits, ty_bits), OpSig::Ternary => match method { "madd" => { @@ -281,7 +274,6 @@ pub(crate) fn handle_compare( vec_ty: &VecType, scalar_bits: usize, ty_bits: usize, - arch: impl Arch, ) -> TokenStream { if vec_ty.scalar == ScalarType::Float { // For AVX2 and up, Intel gives us a generic comparison intrinsic that takes a predicate. There are 32, @@ -305,7 +297,7 @@ pub(crate) fn handle_compare( } } } else { - mk_sse4_2::handle_compare(method_sig, method, vec_ty, scalar_bits, ty_bits, arch) + mk_sse4_2::handle_compare(method_sig, method, vec_ty, scalar_bits, ty_bits) } } diff --git a/fearless_simd_gen/src/mk_fallback.rs b/fearless_simd_gen/src/mk_fallback.rs index 1a94b986..fc6a6aa3 100644 --- a/fearless_simd_gen/src/mk_fallback.rs +++ b/fearless_simd_gen/src/mk_fallback.rs @@ -6,8 +6,7 @@ reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" )] -use crate::arch::fallback::Fallback; -use crate::arch::{Arch, fallback}; +use crate::arch::fallback::{self, Fallback}; use crate::generic::{generic_combine, generic_op, generic_split}; use crate::ops::{OpSig, TyFlavor, ops_for_type, reinterpret_ty, valid_reinterpret}; use crate::types::{SIMD_TYPES, ScalarType, VecType, type_imports}; diff --git a/fearless_simd_gen/src/mk_neon.rs b/fearless_simd_gen/src/mk_neon.rs index 48ca55e6..c86996cd 100644 --- a/fearless_simd_gen/src/mk_neon.rs +++ b/fearless_simd_gen/src/mk_neon.rs @@ -14,7 +14,6 @@ use crate::arch::neon::split_intrinsic; use crate::ops::{reinterpret_ty, valid_reinterpret}; use crate::types::ScalarType; use crate::{ - arch::Arch, arch::neon::{Neon, cvt_intrinsic, simple_intrinsic}, generic::{generic_combine, generic_op, generic_split}, ops::{OpSig, TyFlavor, ops_for_type}, diff --git a/fearless_simd_gen/src/mk_sse4_2.rs b/fearless_simd_gen/src/mk_sse4_2.rs index 1589cb3b..56d2058a 100644 --- a/fearless_simd_gen/src/mk_sse4_2.rs +++ b/fearless_simd_gen/src/mk_sse4_2.rs @@ -1,7 +1,6 @@ // Copyright 2025 the Fearless_SIMD Authors // SPDX-License-Identifier: Apache-2.0 OR MIT -use crate::arch::Arch; use crate::arch::x86::{ X86, cast_ident, coarse_type, cvt_intrinsic, extend_intrinsic, intrinsic_ident, op_suffix, pack_intrinsic, set1_intrinsic, simple_intrinsic, simple_sign_unaware_intrinsic, @@ -87,7 +86,7 @@ fn mk_simd_impl() -> TokenStream { continue; } - let method = make_method(method, sig, vec_ty, X86, 128); + let method = make_method(method, sig, vec_ty, 128); methods.push(method); } @@ -163,13 +162,7 @@ fn mk_type_impl() -> TokenStream { } } -fn make_method( - method: &str, - sig: OpSig, - vec_ty: &VecType, - arch: impl Arch, - ty_bits: usize, -) -> TokenStream { +fn make_method(method: &str, sig: OpSig, vec_ty: &VecType, ty_bits: usize) -> TokenStream { let scalar_bits = vec_ty.scalar_bits; let ty_name = vec_ty.rust_name(); let method_name = format!("{method}_{ty_name}"); @@ -187,12 +180,12 @@ fn make_method( match sig { OpSig::Splat => handle_splat(method_sig, vec_ty, scalar_bits, ty_bits), - OpSig::Compare => handle_compare(method_sig, method, vec_ty, scalar_bits, ty_bits, arch), - OpSig::Unary => handle_unary(method_sig, method, vec_ty, arch), + OpSig::Compare => handle_compare(method_sig, method, vec_ty, scalar_bits, ty_bits), + OpSig::Unary => handle_unary(method_sig, method, vec_ty), OpSig::WidenNarrow(t) => { handle_widen_narrow(method_sig, method, vec_ty, scalar_bits, ty_bits, t) } - OpSig::Binary => handle_binary(method_sig, method, vec_ty, arch), + OpSig::Binary => handle_binary(method_sig, method, vec_ty), OpSig::Shift => handle_shift(method_sig, method, vec_ty, scalar_bits, ty_bits), OpSig::Ternary => handle_ternary(method_sig, &method_ident, method, vec_ty), OpSig::Select => handle_select(method_sig, vec_ty, scalar_bits), @@ -239,7 +232,6 @@ pub(crate) fn handle_compare( vec_ty: &VecType, scalar_bits: usize, ty_bits: usize, - arch: impl Arch, ) -> TokenStream { let args = [quote! { a.into() }, quote! { b.into() }]; @@ -261,7 +253,7 @@ pub(crate) fn handle_compare( ty_bits, ); - let max_min_expr = arch.expr(max_min, vec_ty, &args); + let max_min_expr = X86.expr(max_min, vec_ty, &args); quote! { #eq_intrinsic(#max_min_expr, a.into()) } } "simd_lt" | "simd_gt" => { @@ -306,11 +298,11 @@ pub(crate) fn handle_compare( } } } - "simd_eq" => arch.expr(method, vec_ty, &args), + "simd_eq" => X86.expr(method, vec_ty, &args), _ => unreachable!(), } } else { - let expr = arch.expr(method, vec_ty, &args); + let expr = X86.expr(method, vec_ty, &args); let ident = cast_ident(ScalarType::Float, ScalarType::Mask, scalar_bits, ty_bits); quote! { #ident(#expr) } }; @@ -322,12 +314,7 @@ pub(crate) fn handle_compare( } } -pub(crate) fn handle_unary( - method_sig: TokenStream, - method: &str, - vec_ty: &VecType, - arch: impl Arch, -) -> TokenStream { +pub(crate) fn handle_unary(method_sig: TokenStream, method: &str, vec_ty: &VecType) -> TokenStream { match method { "fract" => { quote! { @@ -345,7 +332,7 @@ pub(crate) fn handle_unary( } _ => { let args = [quote! { a.into() }]; - let expr = arch.expr(method, vec_ty, &args); + let expr = X86.expr(method, vec_ty, &args); quote! { #method_sig { unsafe { #expr.simd_into(self) } @@ -419,7 +406,6 @@ pub(crate) fn handle_binary( method_sig: TokenStream, method: &str, vec_ty: &VecType, - arch: impl Arch, ) -> TokenStream { if method == "mul" && vec_ty.scalar_bits == 8 { // https://stackoverflow.com/questions/8193601/sse-multiplication-16-x-uint8-t @@ -441,7 +427,7 @@ pub(crate) fn handle_binary( } } else { let args = [quote! { a.into() }, quote! { b.into() }]; - let expr = arch.expr(method, vec_ty, &args); + let expr = X86.expr(method, vec_ty, &args); quote! { #method_sig { unsafe { #expr.simd_into(self) } diff --git a/fearless_simd_gen/src/mk_wasm.rs b/fearless_simd_gen/src/mk_wasm.rs index 5004ed60..2b84c571 100644 --- a/fearless_simd_gen/src/mk_wasm.rs +++ b/fearless_simd_gen/src/mk_wasm.rs @@ -13,7 +13,7 @@ use quote::{format_ident, quote}; use crate::generic::scalar_binary; use crate::ops::valid_reinterpret; use crate::{ - arch::{Arch, wasm::Wasm}, + arch::wasm::Wasm, generic::{generic_combine, generic_op, generic_split}, ops::{OpSig, TyFlavor, ops_for_type}, types::{SIMD_TYPES, ScalarType, type_imports}, From 7e32a6bfe779266527df098e52cc7834255e6b7f Mon Sep 17 00:00:00 2001 From: valadaptive Date: Tue, 11 Nov 2025 23:57:21 -0500 Subject: [PATCH 2/6] Remove redundant arguments from x86 codegen funcs --- fearless_simd_gen/src/mk_avx2.rs | 65 +++++++------- fearless_simd_gen/src/mk_sse4_2.rs | 133 +++++++++++++---------------- 2 files changed, 94 insertions(+), 104 deletions(-) diff --git a/fearless_simd_gen/src/mk_avx2.rs b/fearless_simd_gen/src/mk_avx2.rs index aea62536..52a67da3 100644 --- a/fearless_simd_gen/src/mk_avx2.rs +++ b/fearless_simd_gen/src/mk_avx2.rs @@ -86,7 +86,7 @@ fn mk_simd_impl() -> TokenStream { continue; } - let method = make_method(method, sig, vec_ty, vec_ty.n_bits()); + let method = make_method(method, sig, vec_ty); methods.push(method); } @@ -158,7 +158,7 @@ fn mk_type_impl() -> TokenStream { } } -fn make_method(method: &str, sig: OpSig, vec_ty: &VecType, ty_bits: usize) -> TokenStream { +fn make_method(method: &str, sig: OpSig, vec_ty: &VecType) -> TokenStream { let scalar_bits = vec_ty.scalar_bits; let ty_name = vec_ty.rust_name(); let method_name = format!("{method}_{ty_name}"); @@ -175,14 +175,12 @@ fn make_method(method: &str, sig: OpSig, vec_ty: &VecType, ty_bits: usize) -> To } match sig { - OpSig::Splat => mk_sse4_2::handle_splat(method_sig, vec_ty, scalar_bits, ty_bits), - OpSig::Compare => handle_compare(method_sig, method, vec_ty, scalar_bits, ty_bits), + OpSig::Splat => mk_sse4_2::handle_splat(method_sig, vec_ty), + OpSig::Compare => handle_compare(method_sig, method, vec_ty), OpSig::Unary => mk_sse4_2::handle_unary(method_sig, method, vec_ty), - OpSig::WidenNarrow(t) => { - handle_widen_narrow(method_sig, method, vec_ty, scalar_bits, ty_bits, t) - } + OpSig::WidenNarrow(t) => handle_widen_narrow(method_sig, method, vec_ty, t), OpSig::Binary => mk_sse4_2::handle_binary(method_sig, method, vec_ty), - OpSig::Shift => mk_sse4_2::handle_shift(method_sig, method, vec_ty, scalar_bits, ty_bits), + OpSig::Shift => mk_sse4_2::handle_shift(method_sig, method, vec_ty), OpSig::Ternary => match method { "madd" => { let intrinsic = @@ -204,15 +202,13 @@ fn make_method(method: &str, sig: OpSig, vec_ty: &VecType, ty_bits: usize) -> To } _ => mk_sse4_2::handle_ternary(method_sig, &method_ident, method, vec_ty), }, - OpSig::Select => mk_sse4_2::handle_select(method_sig, vec_ty, scalar_bits), + OpSig::Select => mk_sse4_2::handle_select(method_sig, vec_ty), OpSig::Combine => handle_combine(method_sig, vec_ty), OpSig::Split => handle_split(method_sig, vec_ty), - OpSig::Zip(zip1) => mk_sse4_2::handle_zip(method_sig, vec_ty, scalar_bits, zip1), - OpSig::Unzip(select_even) => { - mk_sse4_2::handle_unzip(method_sig, vec_ty, scalar_bits, select_even) - } + OpSig::Zip(zip1) => mk_sse4_2::handle_zip(method_sig, vec_ty, zip1), + OpSig::Unzip(select_even) => mk_sse4_2::handle_unzip(method_sig, vec_ty, select_even), OpSig::Cvt(scalar, target_scalar_bits) => { - mk_sse4_2::handle_cvt(method_sig, vec_ty, ty_bits, scalar, target_scalar_bits) + mk_sse4_2::handle_cvt(method_sig, vec_ty, scalar, target_scalar_bits) } OpSig::Reinterpret(scalar, target_scalar_bits) => { mk_sse4_2::handle_reinterpret(method_sig, vec_ty, scalar, target_scalar_bits) @@ -272,8 +268,6 @@ pub(crate) fn handle_compare( method_sig: TokenStream, method: &str, vec_ty: &VecType, - scalar_bits: usize, - ty_bits: usize, ) -> TokenStream { if vec_ty.scalar == ScalarType::Float { // For AVX2 and up, Intel gives us a generic comparison intrinsic that takes a predicate. There are 32, @@ -288,8 +282,13 @@ pub(crate) fn handle_compare( "simd_gt" => 0x1E, _ => unreachable!(), }; - let intrinsic = simple_intrinsic("cmp", vec_ty.scalar, scalar_bits, ty_bits); - let cast = cast_ident(ScalarType::Float, ScalarType::Mask, scalar_bits, ty_bits); + let intrinsic = simple_intrinsic("cmp", vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits()); + let cast = cast_ident( + ScalarType::Float, + ScalarType::Mask, + vec_ty.scalar_bits, + vec_ty.n_bits(), + ); quote! { #method_sig { @@ -297,7 +296,7 @@ pub(crate) fn handle_compare( } } } else { - mk_sse4_2::handle_compare(method_sig, method, vec_ty, scalar_bits, ty_bits) + mk_sse4_2::handle_compare(method_sig, method, vec_ty) } } @@ -305,17 +304,19 @@ pub(crate) fn handle_widen_narrow( method_sig: TokenStream, method: &str, vec_ty: &VecType, - scalar_bits: usize, - ty_bits: usize, t: VecType, ) -> TokenStream { let expr = match method { "widen" => { let dst_width = t.n_bits(); - match (dst_width, ty_bits) { + match (dst_width, vec_ty.n_bits()) { (256, 128) => { - let extend = - extend_intrinsic(vec_ty.scalar, scalar_bits, t.scalar_bits, dst_width); + let extend = extend_intrinsic( + vec_ty.scalar, + vec_ty.scalar_bits, + t.scalar_bits, + dst_width, + ); quote! { unsafe { #extend(a.into()).simd_into(self) @@ -323,13 +324,17 @@ pub(crate) fn handle_widen_narrow( } } (512, 256) => { - let extend = - extend_intrinsic(vec_ty.scalar, scalar_bits, t.scalar_bits, ty_bits); + let extend = extend_intrinsic( + vec_ty.scalar, + vec_ty.scalar_bits, + t.scalar_bits, + vec_ty.n_bits(), + ); let combine = format_ident!( "combine_{}", VecType { len: vec_ty.len / 2, - scalar_bits: scalar_bits * 2, + scalar_bits: vec_ty.scalar_bits * 2, ..*vec_ty } .rust_name() @@ -349,7 +354,7 @@ pub(crate) fn handle_widen_narrow( } "narrow" => { let dst_width = t.n_bits(); - match (dst_width, ty_bits) { + match (dst_width, vec_ty.n_bits()) { (128, 256) => { let mask = match t.scalar_bits { 8 => { @@ -369,9 +374,9 @@ pub(crate) fn handle_widen_narrow( } } (256, 512) => { - let mask = set1_intrinsic(vec_ty.scalar, scalar_bits, t.n_bits()); + let mask = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, t.n_bits()); let pack = pack_intrinsic( - scalar_bits, + vec_ty.scalar_bits, matches!(vec_ty.scalar, ScalarType::Int), t.n_bits(), ); diff --git a/fearless_simd_gen/src/mk_sse4_2.rs b/fearless_simd_gen/src/mk_sse4_2.rs index 56d2058a..5eba39e4 100644 --- a/fearless_simd_gen/src/mk_sse4_2.rs +++ b/fearless_simd_gen/src/mk_sse4_2.rs @@ -86,7 +86,7 @@ fn mk_simd_impl() -> TokenStream { continue; } - let method = make_method(method, sig, vec_ty, 128); + let method = make_method(method, sig, vec_ty); methods.push(method); } @@ -162,8 +162,7 @@ fn mk_type_impl() -> TokenStream { } } -fn make_method(method: &str, sig: OpSig, vec_ty: &VecType, ty_bits: usize) -> TokenStream { - let scalar_bits = vec_ty.scalar_bits; +fn make_method(method: &str, sig: OpSig, vec_ty: &VecType) -> TokenStream { let ty_name = vec_ty.rust_name(); let method_name = format!("{method}_{ty_name}"); let method_ident = Ident::new(&method_name, Span::call_site()); @@ -179,22 +178,20 @@ fn make_method(method: &str, sig: OpSig, vec_ty: &VecType, ty_bits: usize) -> To } match sig { - OpSig::Splat => handle_splat(method_sig, vec_ty, scalar_bits, ty_bits), - OpSig::Compare => handle_compare(method_sig, method, vec_ty, scalar_bits, ty_bits), + OpSig::Splat => handle_splat(method_sig, vec_ty), + OpSig::Compare => handle_compare(method_sig, method, vec_ty), OpSig::Unary => handle_unary(method_sig, method, vec_ty), - OpSig::WidenNarrow(t) => { - handle_widen_narrow(method_sig, method, vec_ty, scalar_bits, ty_bits, t) - } + OpSig::WidenNarrow(t) => handle_widen_narrow(method_sig, method, vec_ty, t), OpSig::Binary => handle_binary(method_sig, method, vec_ty), - OpSig::Shift => handle_shift(method_sig, method, vec_ty, scalar_bits, ty_bits), + OpSig::Shift => handle_shift(method_sig, method, vec_ty), OpSig::Ternary => handle_ternary(method_sig, &method_ident, method, vec_ty), - OpSig::Select => handle_select(method_sig, vec_ty, scalar_bits), + OpSig::Select => handle_select(method_sig, vec_ty), OpSig::Combine => generic_combine(vec_ty), OpSig::Split => generic_split(vec_ty), - OpSig::Zip(zip1) => handle_zip(method_sig, vec_ty, scalar_bits, zip1), - OpSig::Unzip(select_even) => handle_unzip(method_sig, vec_ty, scalar_bits, select_even), + OpSig::Zip(zip1) => handle_zip(method_sig, vec_ty, zip1), + OpSig::Unzip(select_even) => handle_unzip(method_sig, vec_ty, select_even), OpSig::Cvt(scalar, target_scalar_bits) => { - handle_cvt(method_sig, vec_ty, ty_bits, scalar, target_scalar_bits) + handle_cvt(method_sig, vec_ty, scalar, target_scalar_bits) } OpSig::Reinterpret(scalar, target_scalar_bits) => { handle_reinterpret(method_sig, vec_ty, scalar, target_scalar_bits) @@ -206,13 +203,8 @@ fn make_method(method: &str, sig: OpSig, vec_ty: &VecType, ty_bits: usize) -> To } } -pub(crate) fn handle_splat( - method_sig: TokenStream, - vec_ty: &VecType, - scalar_bits: usize, - ty_bits: usize, -) -> TokenStream { - let intrinsic = set1_intrinsic(vec_ty.scalar, scalar_bits, ty_bits); +pub(crate) fn handle_splat(method_sig: TokenStream, vec_ty: &VecType) -> TokenStream { + let intrinsic = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits()); let cast = match vec_ty.scalar { ScalarType::Unsigned => quote!(as _), _ => quote!(), @@ -230,8 +222,6 @@ pub(crate) fn handle_compare( method_sig: TokenStream, method: &str, vec_ty: &VecType, - scalar_bits: usize, - ty_bits: usize, ) -> TokenStream { let args = [quote! { a.into() }, quote! { b.into() }]; @@ -244,13 +234,11 @@ pub(crate) fn handle_compare( _ => unreachable!(), }; - // TODO: in some places, we use vec_ty.scalar bits, and in other places, we use the scalar_bits argument. - // AFAIK, these never differ. let eq_intrinsic = simple_sign_unaware_intrinsic( "cmpeq", vec_ty.scalar, vec_ty.scalar_bits, - ty_bits, + vec_ty.n_bits(), ); let max_min_expr = X86.expr(max_min, vec_ty, &args); @@ -261,19 +249,19 @@ pub(crate) fn handle_compare( "cmpgt", vec_ty.scalar, vec_ty.scalar_bits, - ty_bits, + vec_ty.n_bits(), ); if vec_ty.scalar == ScalarType::Unsigned { // SSE4.2 only has signed GT/LT, but not unsigned. - let set = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, ty_bits); + let set = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits()); let sign = match vec_ty.scalar_bits { 8 => quote! { 0x80u8 }, 16 => quote! { 0x8000u16 }, 32 => quote! { 0x80000000u32 }, _ => unimplemented!(), }; - let xor_op = intrinsic_ident("xor", coarse_type(*vec_ty), ty_bits); + let xor_op = intrinsic_ident("xor", coarse_type(*vec_ty), vec_ty.n_bits()); let args = if method == "simd_lt" { quote! { b_signed, a_signed } } else { @@ -303,7 +291,12 @@ pub(crate) fn handle_compare( } } else { let expr = X86.expr(method, vec_ty, &args); - let ident = cast_ident(ScalarType::Float, ScalarType::Mask, scalar_bits, ty_bits); + let ident = cast_ident( + ScalarType::Float, + ScalarType::Mask, + vec_ty.scalar_bits, + vec_ty.n_bits(), + ); quote! { #ident(#expr) } }; @@ -346,18 +339,21 @@ pub(crate) fn handle_widen_narrow( method_sig: TokenStream, method: &str, vec_ty: &VecType, - scalar_bits: usize, - ty_bits: usize, t: VecType, ) -> TokenStream { match method { "widen" => { - let extend = extend_intrinsic(vec_ty.scalar, scalar_bits, t.scalar_bits, ty_bits); + let extend = extend_intrinsic( + vec_ty.scalar, + vec_ty.scalar_bits, + t.scalar_bits, + vec_ty.n_bits(), + ); let combine = format_ident!( "combine_{}", VecType { len: vec_ty.len / 2, - scalar_bits: scalar_bits * 2, + scalar_bits: vec_ty.scalar_bits * 2, ..*vec_ty } .rust_name() @@ -376,9 +372,9 @@ pub(crate) fn handle_widen_narrow( } } "narrow" => { - let mask = set1_intrinsic(vec_ty.scalar, scalar_bits, t.n_bits()); + let mask = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, t.n_bits()); let pack = pack_intrinsic( - scalar_bits, + vec_ty.scalar_bits, matches!(vec_ty.scalar, ScalarType::Int), t.n_bits(), ); @@ -436,23 +432,18 @@ pub(crate) fn handle_binary( } } -pub(crate) fn handle_shift( - method_sig: TokenStream, - method: &str, - vec_ty: &VecType, - scalar_bits: usize, - ty_bits: usize, -) -> TokenStream { +pub(crate) fn handle_shift(method_sig: TokenStream, method: &str, vec_ty: &VecType) -> TokenStream { let op = match (method, vec_ty.scalar) { ("shr", ScalarType::Unsigned) => "srl", ("shr", ScalarType::Int) => "sra", ("shl", _) => "sll", _ => unreachable!(), }; - let suffix = op_suffix(vec_ty.scalar, scalar_bits.max(16), false); + let ty_bits = vec_ty.n_bits(); + let suffix = op_suffix(vec_ty.scalar, vec_ty.scalar_bits.max(16), false); let shift_intrinsic = intrinsic_ident(op, suffix, ty_bits); - if scalar_bits == 8 { + if vec_ty.scalar_bits == 8 { // SSE doesn't have shifting for 8-bit, so we first convert into // 16 bit, shift, and then back to 8-bit @@ -540,11 +531,7 @@ pub(crate) fn handle_ternary( } } -pub(crate) fn handle_select( - method_sig: TokenStream, - vec_ty: &VecType, - scalar_bits: usize, -) -> TokenStream { +pub(crate) fn handle_select(method_sig: TokenStream, vec_ty: &VecType) -> TokenStream { // Our select ops' argument order is mask, a, b; Intel's intrinsics are b, a, mask let args = [ quote! { c.into() }, @@ -554,7 +541,7 @@ pub(crate) fn handle_select( let ident = cast_ident( ScalarType::Mask, ScalarType::Float, - scalar_bits, + vec_ty.scalar_bits, vec_ty.n_bits(), ); quote! { #ident(a.into()) } @@ -571,24 +558,19 @@ pub(crate) fn handle_select( } } -pub(crate) fn handle_zip( - method_sig: TokenStream, - vec_ty: &VecType, - scalar_bits: usize, - zip1: bool, -) -> TokenStream { +pub(crate) fn handle_zip(method_sig: TokenStream, vec_ty: &VecType, zip1: bool) -> TokenStream { let expr = match vec_ty.n_bits() { 128 => { let op = if zip1 { "unpacklo" } else { "unpackhi" }; - let suffix = op_suffix(vec_ty.scalar, scalar_bits, false); + let suffix = op_suffix(vec_ty.scalar, vec_ty.scalar_bits, false); let unpack_intrinsic = intrinsic_ident(op, suffix, vec_ty.n_bits()); quote! { unsafe { #unpack_intrinsic(a.into(), b.into()).simd_into(self) } } } 256 => { - let suffix = op_suffix(vec_ty.scalar, scalar_bits, false); + let suffix = op_suffix(vec_ty.scalar, vec_ty.scalar_bits, false); let lo = intrinsic_ident("unpacklo", suffix, vec_ty.n_bits()); let hi = intrinsic_ident("unpackhi", suffix, vec_ty.n_bits()); let shuffle_immediate = if zip1 { @@ -628,13 +610,12 @@ pub(crate) fn handle_zip( pub(crate) fn handle_unzip( method_sig: TokenStream, vec_ty: &VecType, - scalar_bits: usize, select_even: bool, ) -> TokenStream { - let expr = match (vec_ty.scalar, vec_ty.n_bits(), scalar_bits) { + let expr = match (vec_ty.scalar, vec_ty.n_bits(), vec_ty.scalar_bits) { (ScalarType::Float, 128, _) => { // 128-bit shuffle of floats or doubles; there are built-in SSE intrinsics for this - let suffix = op_suffix(vec_ty.scalar, scalar_bits, false); + let suffix = op_suffix(vec_ty.scalar, vec_ty.scalar_bits, false); let intrinsic = intrinsic_ident("shuffle", suffix, vec_ty.n_bits()); let mask = match (vec_ty.scalar_bits, select_even) { @@ -663,7 +644,7 @@ pub(crate) fn handle_unzip( } (ScalarType::Int | ScalarType::Mask | ScalarType::Unsigned, 128, 16 | 8) => { // Separate out the even-indexed and odd-indexed elements - let mask = match scalar_bits { + let mask = match vec_ty.scalar_bits { 8 => { quote! { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 } } @@ -697,14 +678,14 @@ pub(crate) fn handle_unzip( // First we perform a lane-crossing shuffle to move the even-indexed elements of each input to the lower // half, and the odd-indexed ones to the upper half. // e.g. [0, 1, 2, 3, 4, 5, 6, 7] becomes [0, 2, 4, 6, 1, 3, 5, 7]). - let low_shuffle_kind = match scalar_bits { + let low_shuffle_kind = match vec_ty.scalar_bits { 32 => "permutevar8x32", 64 => "permute4x64", _ => unreachable!(), }; - let low_shuffle_suffix = op_suffix(vec_ty.scalar, scalar_bits, false); + let low_shuffle_suffix = op_suffix(vec_ty.scalar, vec_ty.scalar_bits, false); let low_shuffle_intrinsic = intrinsic_ident(low_shuffle_kind, low_shuffle_suffix, 256); - let low_shuffle = |input_name: TokenStream| match scalar_bits { + let low_shuffle = |input_name: TokenStream| match vec_ty.scalar_bits { 32 => { quote! { #low_shuffle_intrinsic(#input_name, _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7)) } } @@ -740,7 +721,7 @@ pub(crate) fn handle_unzip( } (_, 256, 16 | 8) => { // Separate out the even-indexed and odd-indexed elements within each 128-bit lane - let mask = match scalar_bits { + let mask = match vec_ty.scalar_bits { 8 => { quote! { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 } } @@ -786,20 +767,24 @@ pub(crate) fn handle_unzip( pub(crate) fn handle_cvt( method_sig: TokenStream, vec_ty: &VecType, - ty_bits: usize, - scalar: ScalarType, - scalar_bits: usize, + target_scalar: ScalarType, + target_scalar_bits: usize, ) -> TokenStream { // IMPORTANT TODO: for f32 to u32, we are currently converting it to i32 instead // of u32. We need to properly polyfill this. - let cvt_intrinsic = cvt_intrinsic(*vec_ty, VecType::new(scalar, scalar_bits, vec_ty.len)); + let cvt_intrinsic = cvt_intrinsic( + *vec_ty, + VecType::new(target_scalar, target_scalar_bits, vec_ty.len), + ); let expr = if vec_ty.scalar == ScalarType::Float { - let floor_intrinsic = simple_intrinsic("floor", vec_ty.scalar, vec_ty.scalar_bits, ty_bits); - let max_intrinsic = simple_intrinsic("max", vec_ty.scalar, vec_ty.scalar_bits, ty_bits); - let set = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, ty_bits); + let floor_intrinsic = + simple_intrinsic("floor", vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits()); + let max_intrinsic = + simple_intrinsic("max", vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits()); + let set = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits()); - if scalar == ScalarType::Unsigned { + if target_scalar == ScalarType::Unsigned { quote! { #max_intrinsic(#floor_intrinsic(a.into()), #set(0.0)) } } else { quote! { a.trunc().into() } From 4f614c2a9127639d43e1f02e792573083b80f6f7 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Wed, 12 Nov 2025 00:09:24 -0500 Subject: [PATCH 3/6] Take `&VecType` in more codegen methods --- fearless_simd_gen/src/arch/x86.rs | 47 ++++++++++++------------------ fearless_simd_gen/src/mk_avx2.rs | 14 +++++---- fearless_simd_gen/src/mk_sse4_2.rs | 32 ++++++++------------ 3 files changed, 39 insertions(+), 54 deletions(-) diff --git a/fearless_simd_gen/src/arch/x86.rs b/fearless_simd_gen/src/arch/x86.rs index f6d1645d..648cec74 100644 --- a/fearless_simd_gen/src/arch/x86.rs +++ b/fearless_simd_gen/src/arch/x86.rs @@ -71,16 +71,15 @@ impl X86 { } "neg" => match ty.scalar { ScalarType::Float => { - let set1 = set1_intrinsic(ty.scalar, ty.scalar_bits, ty.n_bits()); - let xor = - simple_intrinsic("xor", ScalarType::Float, ty.scalar_bits, ty.n_bits()); + let set1 = set1_intrinsic(ty); + let xor = simple_intrinsic("xor", ty); quote! { #( #xor(#args, #set1(-0.0)) )* } } ScalarType::Int => { let set0 = intrinsic_ident("setzero", coarse_type(*ty), ty.n_bits()); - let sub = simple_intrinsic("sub", ty.scalar, ty.scalar_bits, ty.n_bits()); + let sub = simple_intrinsic("sub", ty); let arg = &args[0]; quote! { #sub(#set0(), #arg) @@ -89,9 +88,8 @@ impl X86 { _ => unreachable!(), }, "abs" => { - let set1 = set1_intrinsic(ty.scalar, ty.scalar_bits, ty.n_bits()); - let andnot = - simple_intrinsic("andnot", ScalarType::Float, ty.scalar_bits, ty.n_bits()); + let set1 = set1_intrinsic(ty); + let andnot = simple_intrinsic("andnot", ty); quote! { #( #andnot(#set1(-0.0), #args) )* } @@ -99,12 +97,10 @@ impl X86 { "copysign" => { let a = &args[0]; let b = &args[1]; - let set1 = set1_intrinsic(ty.scalar, ty.scalar_bits, ty.n_bits()); - let and = - simple_intrinsic("and", ScalarType::Float, ty.scalar_bits, ty.n_bits()); - let andnot = - simple_intrinsic("andnot", ScalarType::Float, ty.scalar_bits, ty.n_bits()); - let or = simple_intrinsic("or", ScalarType::Float, ty.scalar_bits, ty.n_bits()); + let set1 = set1_intrinsic(ty); + let and = simple_intrinsic("and", ty); + let andnot = simple_intrinsic("andnot", ty); + let or = simple_intrinsic("or", ty); quote! { let mask = #set1(-0.0); #or(#and(mask, #b), #andnot(mask, #a)) @@ -167,31 +163,26 @@ pub(crate) fn coarse_type(vec_ty: VecType) -> &'static str { } } -pub(crate) fn set1_intrinsic(ty: ScalarType, bits: usize, ty_bits: usize) -> Ident { +pub(crate) fn set1_intrinsic(vec_ty: &VecType) -> Ident { use ScalarType::*; - let suffix = match (ty, bits) { + let suffix = match (vec_ty.scalar, vec_ty.scalar_bits) { (Int | Unsigned | Mask, 64) => "epi64x", - _ => op_suffix(ty, bits, false), + (scalar, bits) => op_suffix(scalar, bits, false), }; - intrinsic_ident("set1", suffix, ty_bits) + intrinsic_ident("set1", suffix, vec_ty.n_bits()) } -pub(crate) fn simple_intrinsic(name: &str, ty: ScalarType, bits: usize, ty_bits: usize) -> Ident { - let suffix = op_suffix(ty, bits, true); +pub(crate) fn simple_intrinsic(name: &str, vec_ty: &VecType) -> Ident { + let suffix = op_suffix(vec_ty.scalar, vec_ty.scalar_bits, true); - intrinsic_ident(name, suffix, ty_bits) + intrinsic_ident(name, suffix, vec_ty.n_bits()) } -pub(crate) fn simple_sign_unaware_intrinsic( - name: &str, - ty: ScalarType, - bits: usize, - ty_bits: usize, -) -> Ident { - let suffix = op_suffix(ty, bits, false); +pub(crate) fn simple_sign_unaware_intrinsic(name: &str, vec_ty: &VecType) -> Ident { + let suffix = op_suffix(vec_ty.scalar, vec_ty.scalar_bits, false); - intrinsic_ident(name, suffix, ty_bits) + intrinsic_ident(name, suffix, vec_ty.n_bits()) } pub(crate) fn extend_intrinsic( diff --git a/fearless_simd_gen/src/mk_avx2.rs b/fearless_simd_gen/src/mk_avx2.rs index 52a67da3..98aa7b4d 100644 --- a/fearless_simd_gen/src/mk_avx2.rs +++ b/fearless_simd_gen/src/mk_avx2.rs @@ -183,8 +183,7 @@ fn make_method(method: &str, sig: OpSig, vec_ty: &VecType) -> TokenStream { OpSig::Shift => mk_sse4_2::handle_shift(method_sig, method, vec_ty), OpSig::Ternary => match method { "madd" => { - let intrinsic = - simple_intrinsic("fmadd", vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits()); + let intrinsic = simple_intrinsic("fmadd", vec_ty); quote! { #method_sig { unsafe { #intrinsic(a.into(), b.into(), c.into()).simd_into(self) } @@ -192,8 +191,7 @@ fn make_method(method: &str, sig: OpSig, vec_ty: &VecType) -> TokenStream { } } "msub" => { - let intrinsic = - simple_intrinsic("fmsub", vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits()); + let intrinsic = simple_intrinsic("fmsub", vec_ty); quote! { #method_sig { unsafe { #intrinsic(a.into(), b.into(), c.into()).simd_into(self) } @@ -282,7 +280,7 @@ pub(crate) fn handle_compare( "simd_gt" => 0x1E, _ => unreachable!(), }; - let intrinsic = simple_intrinsic("cmp", vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits()); + let intrinsic = simple_intrinsic("cmp", vec_ty); let cast = cast_ident( ScalarType::Float, ScalarType::Mask, @@ -374,7 +372,11 @@ pub(crate) fn handle_widen_narrow( } } (256, 512) => { - let mask = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, t.n_bits()); + let mask = set1_intrinsic(&VecType::new( + vec_ty.scalar, + vec_ty.scalar_bits, + vec_ty.len / 2, + )); let pack = pack_intrinsic( vec_ty.scalar_bits, matches!(vec_ty.scalar, ScalarType::Int), diff --git a/fearless_simd_gen/src/mk_sse4_2.rs b/fearless_simd_gen/src/mk_sse4_2.rs index 5eba39e4..edd2448b 100644 --- a/fearless_simd_gen/src/mk_sse4_2.rs +++ b/fearless_simd_gen/src/mk_sse4_2.rs @@ -204,7 +204,7 @@ fn make_method(method: &str, sig: OpSig, vec_ty: &VecType) -> TokenStream { } pub(crate) fn handle_splat(method_sig: TokenStream, vec_ty: &VecType) -> TokenStream { - let intrinsic = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits()); + let intrinsic = set1_intrinsic(vec_ty); let cast = match vec_ty.scalar { ScalarType::Unsigned => quote!(as _), _ => quote!(), @@ -234,27 +234,17 @@ pub(crate) fn handle_compare( _ => unreachable!(), }; - let eq_intrinsic = simple_sign_unaware_intrinsic( - "cmpeq", - vec_ty.scalar, - vec_ty.scalar_bits, - vec_ty.n_bits(), - ); + let eq_intrinsic = simple_sign_unaware_intrinsic("cmpeq", vec_ty); let max_min_expr = X86.expr(max_min, vec_ty, &args); quote! { #eq_intrinsic(#max_min_expr, a.into()) } } "simd_lt" | "simd_gt" => { - let gt = simple_sign_unaware_intrinsic( - "cmpgt", - vec_ty.scalar, - vec_ty.scalar_bits, - vec_ty.n_bits(), - ); + let gt = simple_sign_unaware_intrinsic("cmpgt", vec_ty); if vec_ty.scalar == ScalarType::Unsigned { // SSE4.2 only has signed GT/LT, but not unsigned. - let set = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits()); + let set = set1_intrinsic(vec_ty); let sign = match vec_ty.scalar_bits { 8 => quote! { 0x80u8 }, 16 => quote! { 0x8000u16 }, @@ -372,7 +362,11 @@ pub(crate) fn handle_widen_narrow( } } "narrow" => { - let mask = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, t.n_bits()); + let mask = set1_intrinsic(&VecType::new( + vec_ty.scalar, + vec_ty.scalar_bits, + vec_ty.len / 2, + )); let pack = pack_intrinsic( vec_ty.scalar_bits, matches!(vec_ty.scalar, ScalarType::Int), @@ -778,11 +772,9 @@ pub(crate) fn handle_cvt( ); let expr = if vec_ty.scalar == ScalarType::Float { - let floor_intrinsic = - simple_intrinsic("floor", vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits()); - let max_intrinsic = - simple_intrinsic("max", vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits()); - let set = set1_intrinsic(vec_ty.scalar, vec_ty.scalar_bits, vec_ty.n_bits()); + let floor_intrinsic = simple_intrinsic("floor", vec_ty); + let max_intrinsic = simple_intrinsic("max", vec_ty); + let set = set1_intrinsic(vec_ty); if target_scalar == ScalarType::Unsigned { quote! { #max_intrinsic(#floor_intrinsic(a.into()), #set(0.0)) } From 400938e9d3556a9578771315ba52182f3773ca14 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Wed, 12 Nov 2025 00:10:41 -0500 Subject: [PATCH 4/6] Consistently take VecType by reference --- fearless_simd_gen/src/arch/x86.rs | 12 ++++++------ fearless_simd_gen/src/mk_avx2.rs | 2 +- fearless_simd_gen/src/mk_sse4_2.rs | 16 ++++++++-------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/fearless_simd_gen/src/arch/x86.rs b/fearless_simd_gen/src/arch/x86.rs index 648cec74..00b4ae81 100644 --- a/fearless_simd_gen/src/arch/x86.rs +++ b/fearless_simd_gen/src/arch/x86.rs @@ -56,7 +56,7 @@ impl X86 { let sign_aware = matches!(op, "max" | "min"); let suffix = match op_name { - "and" | "or" | "xor" => coarse_type(*ty), + "and" | "or" | "xor" => coarse_type(ty), "blendv" if ty.scalar != ScalarType::Float => "epi8", _ => op_suffix(ty.scalar, ty.scalar_bits, sign_aware), }; @@ -78,7 +78,7 @@ impl X86 { } } ScalarType::Int => { - let set0 = intrinsic_ident("setzero", coarse_type(*ty), ty.n_bits()); + let set0 = intrinsic_ident("setzero", coarse_type(ty), ty.n_bits()); let sub = simple_intrinsic("sub", ty); let arg = &args[0]; quote! { @@ -153,7 +153,7 @@ pub(crate) fn op_suffix(mut ty: ScalarType, bits: usize, sign_aware: bool) -> &' } /// Intrinsic name for the "int, float, or double" type (not as fine-grained as [`op_suffix`]). -pub(crate) fn coarse_type(vec_ty: VecType) -> &'static str { +pub(crate) fn coarse_type(vec_ty: &VecType) -> &'static str { use ScalarType::*; match (vec_ty.scalar, vec_ty.n_bits()) { (Int | Unsigned | Mask, 128) => "si128", @@ -197,7 +197,7 @@ pub(crate) fn extend_intrinsic( intrinsic_ident(&format!("cvt{from_suffix}"), to_suffix, ty_bits) } -pub(crate) fn cvt_intrinsic(from: VecType, to: VecType) -> Ident { +pub(crate) fn cvt_intrinsic(from: &VecType, to: &VecType) -> Ident { let from_suffix = op_suffix(from.scalar, from.scalar_bits, false); let to_suffix = op_suffix(to.scalar, to.scalar_bits, false); @@ -250,12 +250,12 @@ pub(crate) fn cast_ident( 512 => "512", _ => unreachable!(), }; - let src_name = coarse_type(VecType::new( + let src_name = coarse_type(&VecType::new( src_scalar_ty, scalar_bits, ty_bits / scalar_bits, )); - let dst_name = coarse_type(VecType::new( + let dst_name = coarse_type(&VecType::new( dst_scalar_ty, scalar_bits, ty_bits / scalar_bits, diff --git a/fearless_simd_gen/src/mk_avx2.rs b/fearless_simd_gen/src/mk_avx2.rs index 98aa7b4d..26eaf22c 100644 --- a/fearless_simd_gen/src/mk_avx2.rs +++ b/fearless_simd_gen/src/mk_avx2.rs @@ -226,7 +226,7 @@ pub(crate) fn handle_split(method_sig: TokenStream, vec_ty: &VecType) -> TokenSt ScalarType::Float => "extractf128", _ => "extracti128", }; - let extract_intrinsic = intrinsic_ident(extract_op, coarse_type(*vec_ty), 256); + let extract_intrinsic = intrinsic_ident(extract_op, coarse_type(vec_ty), 256); quote! { #method_sig { unsafe { diff --git a/fearless_simd_gen/src/mk_sse4_2.rs b/fearless_simd_gen/src/mk_sse4_2.rs index edd2448b..3b8aee25 100644 --- a/fearless_simd_gen/src/mk_sse4_2.rs +++ b/fearless_simd_gen/src/mk_sse4_2.rs @@ -251,7 +251,7 @@ pub(crate) fn handle_compare( 32 => quote! { 0x80000000u32 }, _ => unimplemented!(), }; - let xor_op = intrinsic_ident("xor", coarse_type(*vec_ty), vec_ty.n_bits()); + let xor_op = intrinsic_ident("xor", coarse_type(vec_ty), vec_ty.n_bits()); let args = if method == "simd_lt" { quote! { b_signed, a_signed } } else { @@ -401,8 +401,8 @@ pub(crate) fn handle_binary( // https://stackoverflow.com/questions/8193601/sse-multiplication-16-x-uint8-t let mullo = intrinsic_ident("mullo", "epi16", vec_ty.n_bits()); let set1 = intrinsic_ident("set1", "epi16", vec_ty.n_bits()); - let and = intrinsic_ident("and", coarse_type(*vec_ty), vec_ty.n_bits()); - let or = intrinsic_ident("or", coarse_type(*vec_ty), vec_ty.n_bits()); + let and = intrinsic_ident("and", coarse_type(vec_ty), vec_ty.n_bits()); + let or = intrinsic_ident("or", coarse_type(vec_ty), vec_ty.n_bits()); let slli = intrinsic_ident("slli", "epi16", vec_ty.n_bits()); let srli = intrinsic_ident("srli", "epi16", vec_ty.n_bits()); quote! { @@ -444,7 +444,7 @@ pub(crate) fn handle_shift(method_sig: TokenStream, method: &str, vec_ty: &VecTy let unpack_hi = unpack_intrinsic(ScalarType::Int, 8, false, ty_bits); let unpack_lo = unpack_intrinsic(ScalarType::Int, 8, true, ty_bits); - let set0 = intrinsic_ident("setzero", coarse_type(*vec_ty), ty_bits); + let set0 = intrinsic_ident("setzero", coarse_type(vec_ty), ty_bits); let extend_expr = |expr| match vec_ty.scalar { ScalarType::Unsigned => quote! { #expr(val, #set0()) @@ -578,7 +578,7 @@ pub(crate) fn handle_zip(method_sig: TokenStream, vec_ty: &VecType, zip1: bool) ScalarType::Float => "permute2f128", _ => "permute2x128", }, - coarse_type(*vec_ty), + coarse_type(vec_ty), 256, ); @@ -695,7 +695,7 @@ pub(crate) fn handle_unzip( ScalarType::Float => "permute2f128", _ => "permute2x128", }, - coarse_type(*vec_ty), + coarse_type(vec_ty), 256, ); let high_shuffle_immediate = if select_even { @@ -767,8 +767,8 @@ pub(crate) fn handle_cvt( // IMPORTANT TODO: for f32 to u32, we are currently converting it to i32 instead // of u32. We need to properly polyfill this. let cvt_intrinsic = cvt_intrinsic( - *vec_ty, - VecType::new(target_scalar, target_scalar_bits, vec_ty.len), + vec_ty, + &VecType::new(target_scalar, target_scalar_bits, vec_ty.len), ); let expr = if vec_ty.scalar == ScalarType::Float { From a4b936c6c2abe8b2bd48d159ed3118fa326d83b6 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Thu, 13 Nov 2025 19:42:39 -0500 Subject: [PATCH 5/6] Remove the Arch trait even more --- fearless_simd_gen/src/arch/fallback.rs | 40 +++---- fearless_simd_gen/src/arch/neon.rs | 84 +++++++------ fearless_simd_gen/src/arch/wasm.rs | 57 ++++----- fearless_simd_gen/src/arch/x86.rs | 159 ++++++++++++------------- fearless_simd_gen/src/mk_avx2.rs | 4 +- fearless_simd_gen/src/mk_fallback.rs | 14 +-- fearless_simd_gen/src/mk_neon.rs | 22 ++-- fearless_simd_gen/src/mk_sse4_2.rs | 18 +-- fearless_simd_gen/src/mk_wasm.rs | 12 +- 9 files changed, 194 insertions(+), 216 deletions(-) diff --git a/fearless_simd_gen/src/arch/fallback.rs b/fearless_simd_gen/src/arch/fallback.rs index e976f39b..62ebaa47 100644 --- a/fearless_simd_gen/src/arch/fallback.rs +++ b/fearless_simd_gen/src/arch/fallback.rs @@ -64,34 +64,30 @@ pub(crate) fn translate_op(op: &str, is_float: bool) -> Option<&'static str> { } pub fn simple_intrinsic(name: &str, ty: &VecType) -> TokenStream { - let ty_prefix = Fallback.arch_ty(ty); + let ty_prefix = arch_ty(ty); let ident = Ident::new(name, Span::call_site()); quote! {#ty_prefix::#ident} } -pub struct Fallback; - -impl Fallback { - pub(crate) fn arch_ty(&self, ty: &VecType) -> TokenStream { - let scalar = match ty.scalar { - ScalarType::Float => "f", - ScalarType::Unsigned => "u", - ScalarType::Int | ScalarType::Mask => "i", - }; - let name = format!("{}{}", scalar, ty.scalar_bits); - let ident = Ident::new(&name, Span::call_site()); - quote! { #ident } - } +pub(crate) fn arch_ty(ty: &VecType) -> TokenStream { + let scalar = match ty.scalar { + ScalarType::Float => "f", + ScalarType::Unsigned => "u", + ScalarType::Int | ScalarType::Mask => "i", + }; + let name = format!("{}{}", scalar, ty.scalar_bits); + let ident = Ident::new(&name, Span::call_site()); + quote! { #ident } +} - pub(crate) fn expr(&self, op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream { - if let Some(translated) = translate_op(op, ty.scalar == ScalarType::Float) { - let intrinsic = simple_intrinsic(translated, ty); - quote! { #intrinsic ( #( #args ),* ) } - } else { - match op { - _ => unimplemented!("missing {op}"), - } +pub(crate) fn expr(op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream { + if let Some(translated) = translate_op(op, ty.scalar == ScalarType::Float) { + let intrinsic = simple_intrinsic(translated, ty); + quote! { #intrinsic ( #( #args ),* ) } + } else { + match op { + _ => unimplemented!("missing {op}"), } } } diff --git a/fearless_simd_gen/src/arch/neon.rs b/fearless_simd_gen/src/arch/neon.rs index 0a55550a..6257a5a4 100644 --- a/fearless_simd_gen/src/arch/neon.rs +++ b/fearless_simd_gen/src/arch/neon.rs @@ -10,8 +10,6 @@ use crate::types::{ScalarType, VecType}; use proc_macro2::{Ident, Span, TokenStream}; use quote::quote; -pub struct Neon; - fn translate_op(op: &str) -> Option<&'static str> { Some(match op { "abs" => "vabs", @@ -45,54 +43,52 @@ fn translate_op(op: &str) -> Option<&'static str> { }) } -impl Neon { - pub(crate) fn arch_ty(&self, ty: &VecType) -> TokenStream { - let scalar = match ty.scalar { - ScalarType::Float => "float", - ScalarType::Unsigned => "uint", - ScalarType::Int | ScalarType::Mask => "int", - }; - let name = if ty.n_bits() == 256 { - format!("{}{}x{}x2_t", scalar, ty.scalar_bits, ty.len / 2) - } else if ty.n_bits() == 512 { - format!("{}{}x{}x4_t", scalar, ty.scalar_bits, ty.len / 4) - } else { - format!("{}{}x{}_t", scalar, ty.scalar_bits, ty.len) - }; - let ident = Ident::new(&name, Span::call_site()); - quote! { #ident } - } +pub(crate) fn arch_ty(ty: &VecType) -> TokenStream { + let scalar = match ty.scalar { + ScalarType::Float => "float", + ScalarType::Unsigned => "uint", + ScalarType::Int | ScalarType::Mask => "int", + }; + let name = if ty.n_bits() == 256 { + format!("{}{}x{}x2_t", scalar, ty.scalar_bits, ty.len / 2) + } else if ty.n_bits() == 512 { + format!("{}{}x{}x4_t", scalar, ty.scalar_bits, ty.len / 4) + } else { + format!("{}{}x{}_t", scalar, ty.scalar_bits, ty.len) + }; + let ident = Ident::new(&name, Span::call_site()); + quote! { #ident } +} - // expects args and return value in arch dialect - pub(crate) fn expr(&self, op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream { - // There is no logical NOT for 64-bit, so we need this workaround. - if op == "not" && ty.scalar_bits == 64 && ty.scalar == ScalarType::Mask { - return quote! { vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a.into()))) }; - } +// expects args and return value in arch dialect +pub(crate) fn expr(op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream { + // There is no logical NOT for 64-bit, so we need this workaround. + if op == "not" && ty.scalar_bits == 64 && ty.scalar == ScalarType::Mask { + return quote! { vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a.into()))) }; + } - if let Some(xlat) = translate_op(op) { - let intrinsic = simple_intrinsic(xlat, ty); - return quote! { #intrinsic ( #( #args ),* ) }; + if let Some(xlat) = translate_op(op) { + let intrinsic = simple_intrinsic(xlat, ty); + return quote! { #intrinsic ( #( #args ),* ) }; + } + match op { + "splat" => { + let intrinsic = split_intrinsic("vdup", "n", ty); + quote! { #intrinsic ( #( #args ),* ) } } - match op { - "splat" => { - let intrinsic = split_intrinsic("vdup", "n", ty); - quote! { #intrinsic ( #( #args ),* ) } - } - "fract" => { - let to = VecType::new(ScalarType::Int, ty.scalar_bits, ty.len); - let c1 = cvt_intrinsic("vcvt", &to, ty); - let c2 = cvt_intrinsic("vcvt", ty, &to); - let sub = simple_intrinsic("vsub", ty); - quote! { - let c1 = #c1(a.into()); - let c2 = #c2(c1); + "fract" => { + let to = VecType::new(ScalarType::Int, ty.scalar_bits, ty.len); + let c1 = cvt_intrinsic("vcvt", &to, ty); + let c2 = cvt_intrinsic("vcvt", ty, &to); + let sub = simple_intrinsic("vsub", ty); + quote! { + let c1 = #c1(a.into()); + let c2 = #c2(c1); - #sub(a.into(), c2) - } + #sub(a.into(), c2) } - _ => unimplemented!("missing {op}"), } + _ => unimplemented!("missing {op}"), } } diff --git a/fearless_simd_gen/src/arch/wasm.rs b/fearless_simd_gen/src/arch/wasm.rs index dcbbc5b0..1898817d 100644 --- a/fearless_simd_gen/src/arch/wasm.rs +++ b/fearless_simd_gen/src/arch/wasm.rs @@ -4,7 +4,6 @@ #![expect( clippy::match_single_binding, clippy::uninlined_format_args, - unreachable_pub, reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" )] @@ -12,8 +11,6 @@ use crate::types::{ScalarType, VecType}; use proc_macro2::{Ident, Span, TokenStream}; use quote::quote; -pub struct Wasm; - fn translate_op(op: &str) -> Option<&'static str> { Some(match op { "abs" => "abs", @@ -47,7 +44,7 @@ fn translate_op(op: &str) -> Option<&'static str> { } fn simple_intrinsic(name: &str, ty: &VecType) -> TokenStream { - let ty_prefix = Wasm.arch_ty(ty); + let ty_prefix = arch_ty(ty); let ident = Ident::new(name, Span::call_site()); let combined_ident = Ident::new(&format!("{}_{}", ty_prefix, ident), Span::call_site()); quote! { #combined_ident } @@ -60,35 +57,33 @@ fn v128_intrinsic(name: &str) -> TokenStream { quote! { #combined_ident } } -impl Wasm { - pub(crate) fn arch_ty(&self, ty: &VecType) -> TokenStream { - let scalar = match ty.scalar { - ScalarType::Float => "f", - ScalarType::Unsigned => "u", - ScalarType::Int | ScalarType::Mask => "i", - }; - let name = format!("{}{}x{}", scalar, ty.scalar_bits, ty.len); - let ident = Ident::new(&name, Span::call_site()); - quote! { #ident } - } +pub(crate) fn arch_ty(ty: &VecType) -> TokenStream { + let scalar = match ty.scalar { + ScalarType::Float => "f", + ScalarType::Unsigned => "u", + ScalarType::Int | ScalarType::Mask => "i", + }; + let name = format!("{}{}x{}", scalar, ty.scalar_bits, ty.len); + let ident = Ident::new(&name, Span::call_site()); + quote! { #ident } +} - // expects args and return value in arch dialect - pub(crate) fn expr(&self, op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream { - if let Some(translated) = translate_op(op) { - let intrinsic = match translated { - "not" => v128_intrinsic(translated), - "and" => v128_intrinsic(translated), - "or" => v128_intrinsic(translated), - "xor" => v128_intrinsic(translated), - _ => simple_intrinsic(translated, ty), - }; +// expects args and return value in arch dialect +pub(crate) fn expr(op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream { + if let Some(translated) = translate_op(op) { + let intrinsic = match translated { + "not" => v128_intrinsic(translated), + "and" => v128_intrinsic(translated), + "or" => v128_intrinsic(translated), + "xor" => v128_intrinsic(translated), + _ => simple_intrinsic(translated, ty), + }; - quote! { #intrinsic ( #( #args ),* ) } - } else { - match op { - // Add any special case operations here if needed - _ => unimplemented!("missing {op}"), - } + quote! { #intrinsic ( #( #args ),* ) } + } else { + match op { + // Add any special case operations here if needed + _ => unimplemented!("missing {op}"), } } } diff --git a/fearless_simd_gen/src/arch/x86.rs b/fearless_simd_gen/src/arch/x86.rs index 00b4ae81..809a1732 100644 --- a/fearless_simd_gen/src/arch/x86.rs +++ b/fearless_simd_gen/src/arch/x86.rs @@ -1,17 +1,10 @@ // Copyright 2025 the Fearless_SIMD Authors // SPDX-License-Identifier: Apache-2.0 OR MIT -#![expect( - unreachable_pub, - reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" -)] - use crate::types::{ScalarType, VecType}; use proc_macro2::{Ident, Span, TokenStream}; use quote::{format_ident, quote}; -pub struct X86; - pub(crate) fn translate_op(op: &str) -> Option<&'static str> { Some(match op { "floor" => "floor", @@ -38,95 +31,93 @@ pub(crate) fn translate_op(op: &str) -> Option<&'static str> { }) } -impl X86 { - pub(crate) fn arch_ty(&self, ty: &VecType) -> TokenStream { - let suffix = match (ty.scalar, ty.scalar_bits) { - (ScalarType::Float, 32) => "", - (ScalarType::Float, 64) => "d", - (ScalarType::Float, _) => unimplemented!(), - (ScalarType::Unsigned | ScalarType::Int | ScalarType::Mask, _) => "i", - }; - let name = format!("__m{}{}", ty.scalar_bits * ty.len, suffix); - let ident = Ident::new(&name, Span::call_site()); - quote! { #ident } - } +pub(crate) fn arch_ty(ty: &VecType) -> TokenStream { + let suffix = match (ty.scalar, ty.scalar_bits) { + (ScalarType::Float, 32) => "", + (ScalarType::Float, 64) => "d", + (ScalarType::Float, _) => unimplemented!(), + (ScalarType::Unsigned | ScalarType::Int | ScalarType::Mask, _) => "i", + }; + let name = format!("__m{}{}", ty.scalar_bits * ty.len, suffix); + let ident = Ident::new(&name, Span::call_site()); + quote! { #ident } +} - pub(crate) fn expr(&self, op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream { - if let Some(op_name) = translate_op(op) { - let sign_aware = matches!(op, "max" | "min"); +pub(crate) fn expr(op: &str, ty: &VecType, args: &[TokenStream]) -> TokenStream { + if let Some(op_name) = translate_op(op) { + let sign_aware = matches!(op, "max" | "min"); - let suffix = match op_name { - "and" | "or" | "xor" => coarse_type(ty), - "blendv" if ty.scalar != ScalarType::Float => "epi8", - _ => op_suffix(ty.scalar, ty.scalar_bits, sign_aware), - }; - let intrinsic = intrinsic_ident(op_name, suffix, ty.n_bits()); - quote! { #intrinsic ( #( #args ),* ) } - } else { - let suffix = op_suffix(ty.scalar, ty.scalar_bits, true); - match op { - "trunc" => { - let intrinsic = intrinsic_ident("round", suffix, ty.n_bits()); - quote! { #intrinsic ( #( #args, )* _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) } - } - "neg" => match ty.scalar { - ScalarType::Float => { - let set1 = set1_intrinsic(ty); - let xor = simple_intrinsic("xor", ty); - quote! { - #( #xor(#args, #set1(-0.0)) )* - } - } - ScalarType::Int => { - let set0 = intrinsic_ident("setzero", coarse_type(ty), ty.n_bits()); - let sub = simple_intrinsic("sub", ty); - let arg = &args[0]; - quote! { - #sub(#set0(), #arg) - } - } - _ => unreachable!(), - }, - "abs" => { + let suffix = match op_name { + "and" | "or" | "xor" => coarse_type(ty), + "blendv" if ty.scalar != ScalarType::Float => "epi8", + _ => op_suffix(ty.scalar, ty.scalar_bits, sign_aware), + }; + let intrinsic = intrinsic_ident(op_name, suffix, ty.n_bits()); + quote! { #intrinsic ( #( #args ),* ) } + } else { + let suffix = op_suffix(ty.scalar, ty.scalar_bits, true); + match op { + "trunc" => { + let intrinsic = intrinsic_ident("round", suffix, ty.n_bits()); + quote! { #intrinsic ( #( #args, )* _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC) } + } + "neg" => match ty.scalar { + ScalarType::Float => { let set1 = set1_intrinsic(ty); - let andnot = simple_intrinsic("andnot", ty); + let xor = simple_intrinsic("xor", ty); quote! { - #( #andnot(#set1(-0.0), #args) )* + #( #xor(#args, #set1(-0.0)) )* } } - "copysign" => { - let a = &args[0]; - let b = &args[1]; - let set1 = set1_intrinsic(ty); - let and = simple_intrinsic("and", ty); - let andnot = simple_intrinsic("andnot", ty); - let or = simple_intrinsic("or", ty); + ScalarType::Int => { + let set0 = intrinsic_ident("setzero", coarse_type(ty), ty.n_bits()); + let sub = simple_intrinsic("sub", ty); + let arg = &args[0]; quote! { - let mask = #set1(-0.0); - #or(#and(mask, #b), #andnot(mask, #a)) + #sub(#set0(), #arg) } } - "mul" => { - let suffix = op_suffix(ty.scalar, ty.scalar_bits, false); - let intrinsic = if matches!(ty.scalar, ScalarType::Int | ScalarType::Unsigned) { - intrinsic_ident("mullo", suffix, ty.n_bits()) - } else { - intrinsic_ident("mul", suffix, ty.n_bits()) - }; - - quote! { #intrinsic ( #( #args ),* ) } + _ => unreachable!(), + }, + "abs" => { + let set1 = set1_intrinsic(ty); + let andnot = simple_intrinsic("andnot", ty); + quote! { + #( #andnot(#set1(-0.0), #args) )* } - "shrv" if ty.scalar_bits > 16 => { - let suffix = op_suffix(ty.scalar, ty.scalar_bits, false); - let name = match ty.scalar { - ScalarType::Int => "srav", - _ => "srlv", - }; - let intrinsic = intrinsic_ident(name, suffix, ty.n_bits()); - quote! { #intrinsic ( #( #args ),* ) } + } + "copysign" => { + let a = &args[0]; + let b = &args[1]; + let set1 = set1_intrinsic(ty); + let and = simple_intrinsic("and", ty); + let andnot = simple_intrinsic("andnot", ty); + let or = simple_intrinsic("or", ty); + quote! { + let mask = #set1(-0.0); + #or(#and(mask, #b), #andnot(mask, #a)) } - _ => unimplemented!("{}", op), } + "mul" => { + let suffix = op_suffix(ty.scalar, ty.scalar_bits, false); + let intrinsic = if matches!(ty.scalar, ScalarType::Int | ScalarType::Unsigned) { + intrinsic_ident("mullo", suffix, ty.n_bits()) + } else { + intrinsic_ident("mul", suffix, ty.n_bits()) + }; + + quote! { #intrinsic ( #( #args ),* ) } + } + "shrv" if ty.scalar_bits > 16 => { + let suffix = op_suffix(ty.scalar, ty.scalar_bits, false); + let name = match ty.scalar { + ScalarType::Int => "srav", + _ => "srlv", + }; + let intrinsic = intrinsic_ident(name, suffix, ty.n_bits()); + quote! { #intrinsic ( #( #args ),* ) } + } + _ => unimplemented!("{}", op), } } } diff --git a/fearless_simd_gen/src/mk_avx2.rs b/fearless_simd_gen/src/mk_avx2.rs index 26eaf22c..3ee28a67 100644 --- a/fearless_simd_gen/src/mk_avx2.rs +++ b/fearless_simd_gen/src/mk_avx2.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT use crate::arch::x86::{ - X86, cast_ident, coarse_type, extend_intrinsic, intrinsic_ident, pack_intrinsic, + self, cast_ident, coarse_type, extend_intrinsic, intrinsic_ident, pack_intrinsic, set1_intrinsic, simple_intrinsic, }; use crate::generic::{generic_combine, generic_op, generic_split, scalar_binary}; @@ -134,7 +134,7 @@ fn mk_type_impl() -> TokenStream { continue; } let simd = ty.rust(); - let arch = X86.arch_ty(ty); + let arch = x86::arch_ty(ty); result.push(quote! { impl SimdFrom<#arch, S> for #simd { #[inline(always)] diff --git a/fearless_simd_gen/src/mk_fallback.rs b/fearless_simd_gen/src/mk_fallback.rs index fc6a6aa3..3ff93dab 100644 --- a/fearless_simd_gen/src/mk_fallback.rs +++ b/fearless_simd_gen/src/mk_fallback.rs @@ -6,7 +6,7 @@ reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" )] -use crate::arch::fallback::{self, Fallback}; +use crate::arch::fallback; use crate::generic::{generic_combine, generic_op, generic_split}; use crate::ops::{OpSig, TyFlavor, ops_for_type, reinterpret_ty, valid_reinterpret}; use crate::types::{SIMD_TYPES, ScalarType, VecType, type_imports}; @@ -144,7 +144,7 @@ fn mk_simd_impl() -> TokenStream { (0..vec_ty.len) .map(|idx| { let args = [quote! { a[#idx] }]; - let expr = Fallback.expr(method, vec_ty, &args); + let expr = fallback::expr(method, vec_ty, &args); quote! { #expr } }) .collect::>(), @@ -189,7 +189,7 @@ fn mk_simd_impl() -> TokenStream { }; let args = [quote! { a[#idx] }, quote! { #b }]; - let expr = Fallback.expr(method, vec_ty, &args); + let expr = fallback::expr(method, vec_ty, &args); quote! { #expr } }) .collect::>(), @@ -202,12 +202,12 @@ fn mk_simd_impl() -> TokenStream { } } OpSig::Shift => { - let arch_ty = Fallback.arch_ty(vec_ty); + let arch_ty = fallback::arch_ty(vec_ty); let items = make_list( (0..vec_ty.len) .map(|idx| { let args = [quote! { a[#idx] }, quote! { shift as #arch_ty }]; - let expr = Fallback.expr(method, vec_ty, &args); + let expr = fallback::expr(method, vec_ty, &args); quote! { #expr } }) .collect::>(), @@ -242,7 +242,7 @@ fn mk_simd_impl() -> TokenStream { quote! { c.into() }, ]; - let expr = Fallback.expr(method, vec_ty, &args); + let expr = fallback::expr(method, vec_ty, &args); quote! { #method_sig { #expr.simd_into(self) @@ -256,7 +256,7 @@ fn mk_simd_impl() -> TokenStream { (0..vec_ty.len) .map(|idx: usize| { let args = [quote! { &a[#idx] }, quote! { &b[#idx] }]; - let expr = Fallback.expr(method, vec_ty, &args); + let expr = fallback::expr(method, vec_ty, &args); let mask_ty = mask_type.scalar.rust(scalar_bits); quote! { -(#expr as #mask_ty) } }) diff --git a/fearless_simd_gen/src/mk_neon.rs b/fearless_simd_gen/src/mk_neon.rs index c86996cd..c653724a 100644 --- a/fearless_simd_gen/src/mk_neon.rs +++ b/fearless_simd_gen/src/mk_neon.rs @@ -14,7 +14,7 @@ use crate::arch::neon::split_intrinsic; use crate::ops::{reinterpret_ty, valid_reinterpret}; use crate::types::ScalarType; use crate::{ - arch::neon::{Neon, cvt_intrinsic, simple_intrinsic}, + arch::neon::{self, cvt_intrinsic, simple_intrinsic}, generic::{generic_combine, generic_op, generic_split}, ops::{OpSig, TyFlavor, ops_for_type}, types::{SIMD_TYPES, VecType, type_imports}, @@ -103,7 +103,7 @@ fn mk_simd_impl(level: Level) -> TokenStream { let method = match sig { OpSig::Splat => { - let expr = Neon.expr(method, vec_ty, &[quote! { val }]); + let expr = neon::expr(method, vec_ty, &[quote! { val }]); quote! { #method_sig { unsafe { @@ -121,7 +121,7 @@ fn mk_simd_impl(level: Level) -> TokenStream { } else { quote! { shift as #scalar } }; - let expr = Neon.expr( + let expr = neon::expr( method, vec_ty, &[quote! { a.into() }, quote! { #dup_intrinsic ( #shift ) }], @@ -137,7 +137,7 @@ fn mk_simd_impl(level: Level) -> TokenStream { OpSig::Unary => { let args = [quote! { a.into() }]; - let expr = Neon.expr(method, vec_ty, &args); + let expr = neon::expr(method, vec_ty, &args); quote! { #method_sig { @@ -190,7 +190,7 @@ fn mk_simd_impl(level: Level) -> TokenStream { let target_scalar_ty = target_ty.scalar.rust(target_ty.scalar_bits); if method == "narrow" { - let arch = Neon.arch_ty(vec_ty); + let arch = neon::arch_ty(vec_ty); let id1 = Ident::new(&format!("vmovn_{}", vec_scalar_ty), Span::call_site()); @@ -211,7 +211,7 @@ fn mk_simd_impl(level: Level) -> TokenStream { } } } else { - let arch = Neon.arch_ty(&target_ty); + let arch = neon::arch_ty(&target_ty); let id1 = Ident::new(&format!("vmovl_{}", vec_scalar_ty), Span::call_site()); let id2 = @@ -258,7 +258,7 @@ fn mk_simd_impl(level: Level) -> TokenStream { let unsigned_ty = VecType::new(ScalarType::Unsigned, vec_ty.scalar_bits, vec_ty.len); let sign_mask = - Neon.expr("splat", &unsigned_ty, &[quote! { 1 << #shift_amt }]); + neon::expr("splat", &unsigned_ty, &[quote! { 1 << #shift_amt }]); let vbsl = simple_intrinsic("vbsl", vec_ty); quote! { @@ -270,7 +270,7 @@ fn mk_simd_impl(level: Level) -> TokenStream { } } } else { - let expr = Neon.expr(method, vec_ty, &args); + let expr = neon::expr(method, vec_ty, &args); quote! { #method_sig { unsafe { @@ -294,7 +294,7 @@ fn mk_simd_impl(level: Level) -> TokenStream { ], }; - let mut expr = Neon.expr(method, vec_ty, &args); + let mut expr = neon::expr(method, vec_ty, &args); if method == "msub" { // -(c - a * b) = (a * b - c) let neg = simple_intrinsic("vneg", vec_ty); @@ -310,7 +310,7 @@ fn mk_simd_impl(level: Level) -> TokenStream { } OpSig::Compare => { let args = [quote! { a.into() }, quote! { b.into() }]; - let expr = Neon.expr(method, vec_ty, &args); + let expr = neon::expr(method, vec_ty, &args); let opt_q = crate::arch::neon::opt_q(vec_ty); let reinterpret_str = format!("vreinterpret{opt_q}_s{scalar_bits}_u{scalar_bits}"); @@ -440,7 +440,7 @@ fn mk_type_impl() -> TokenStream { continue; } let simd = ty.rust(); - let arch = Neon.arch_ty(ty); + let arch = neon::arch_ty(ty); result.push(quote! { impl SimdFrom<#arch, S> for #simd { #[inline(always)] diff --git a/fearless_simd_gen/src/mk_sse4_2.rs b/fearless_simd_gen/src/mk_sse4_2.rs index 3b8aee25..e0bcd653 100644 --- a/fearless_simd_gen/src/mk_sse4_2.rs +++ b/fearless_simd_gen/src/mk_sse4_2.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT use crate::arch::x86::{ - X86, cast_ident, coarse_type, cvt_intrinsic, extend_intrinsic, intrinsic_ident, op_suffix, + self, cast_ident, coarse_type, cvt_intrinsic, extend_intrinsic, intrinsic_ident, op_suffix, pack_intrinsic, set1_intrinsic, simple_intrinsic, simple_sign_unaware_intrinsic, unpack_intrinsic, }; @@ -138,7 +138,7 @@ fn mk_type_impl() -> TokenStream { continue; } let simd = ty.rust(); - let arch = X86.arch_ty(ty); + let arch = x86::arch_ty(ty); result.push(quote! { impl SimdFrom<#arch, S> for #simd { #[inline(always)] @@ -236,7 +236,7 @@ pub(crate) fn handle_compare( let eq_intrinsic = simple_sign_unaware_intrinsic("cmpeq", vec_ty); - let max_min_expr = X86.expr(max_min, vec_ty, &args); + let max_min_expr = x86::expr(max_min, vec_ty, &args); quote! { #eq_intrinsic(#max_min_expr, a.into()) } } "simd_lt" | "simd_gt" => { @@ -276,11 +276,11 @@ pub(crate) fn handle_compare( } } } - "simd_eq" => X86.expr(method, vec_ty, &args), + "simd_eq" => x86::expr(method, vec_ty, &args), _ => unreachable!(), } } else { - let expr = X86.expr(method, vec_ty, &args); + let expr = x86::expr(method, vec_ty, &args); let ident = cast_ident( ScalarType::Float, ScalarType::Mask, @@ -315,7 +315,7 @@ pub(crate) fn handle_unary(method_sig: TokenStream, method: &str, vec_ty: &VecTy } _ => { let args = [quote! { a.into() }]; - let expr = X86.expr(method, vec_ty, &args); + let expr = x86::expr(method, vec_ty, &args); quote! { #method_sig { unsafe { #expr.simd_into(self) } @@ -417,7 +417,7 @@ pub(crate) fn handle_binary( } } else { let args = [quote! { a.into() }, quote! { b.into() }]; - let expr = X86.expr(method, vec_ty, &args); + let expr = x86::expr(method, vec_ty, &args); quote! { #method_sig { unsafe { #expr.simd_into(self) } @@ -515,7 +515,7 @@ pub(crate) fn handle_ternary( quote! { c.into() }, ]; - let expr = X86.expr(method, vec_ty, &args); + let expr = x86::expr(method, vec_ty, &args); quote! { #method_sig { #expr.simd_into(self) @@ -543,7 +543,7 @@ pub(crate) fn handle_select(method_sig: TokenStream, vec_ty: &VecType) -> TokenS _ => quote! { a.into() }, }, ]; - let expr = X86.expr("select", vec_ty, &args); + let expr = x86::expr("select", vec_ty, &args); quote! { #method_sig { diff --git a/fearless_simd_gen/src/mk_wasm.rs b/fearless_simd_gen/src/mk_wasm.rs index 2b84c571..145691f5 100644 --- a/fearless_simd_gen/src/mk_wasm.rs +++ b/fearless_simd_gen/src/mk_wasm.rs @@ -13,7 +13,7 @@ use quote::{format_ident, quote}; use crate::generic::scalar_binary; use crate::ops::valid_reinterpret; use crate::{ - arch::wasm::Wasm, + arch::wasm, generic::{generic_combine, generic_op, generic_split}, ops::{OpSig, TyFlavor, ops_for_type}, types::{SIMD_TYPES, ScalarType, type_imports}, @@ -66,7 +66,7 @@ fn mk_simd_impl(level: Level) -> TokenStream { }; let m = match sig { OpSig::Splat => { - let expr = Wasm.expr(method, vec_ty, &[quote! { val }]); + let expr = wasm::expr(method, vec_ty, &[quote! { val }]); quote! { #method_sig { #expr.simd_into(self) @@ -86,7 +86,7 @@ fn mk_simd_impl(level: Level) -> TokenStream { a.sub(a.trunc()) } } else { - let expr = Wasm.expr(method, vec_ty, &args); + let expr = wasm::expr(method, vec_ty, &args); quote! { #expr.simd_into(self) } }; @@ -145,7 +145,7 @@ fn mk_simd_impl(level: Level) -> TokenStream { // that `max(NaN, x)` and `min(NaN, x)` result in `x`. This matches // `_mm_max_ps` and `_mm_min_ps` semantics on x86. let swapped_args = [quote! { b.into() }, quote! { a.into() }]; - let expr: TokenStream = Wasm.expr(method, vec_ty, &swapped_args); + let expr: TokenStream = wasm::expr(method, vec_ty, &swapped_args); quote! { #method_sig { #expr.simd_into(self) @@ -153,7 +153,7 @@ fn mk_simd_impl(level: Level) -> TokenStream { } } _ => { - let expr = Wasm.expr(method, vec_ty, &args); + let expr = wasm::expr(method, vec_ty, &args); quote! { #method_sig { #expr.simd_into(self) @@ -182,7 +182,7 @@ fn mk_simd_impl(level: Level) -> TokenStream { } OpSig::Compare => { let args = [quote! { a.into() }, quote! { b.into() }]; - let expr = Wasm.expr(method, vec_ty, &args); + let expr = wasm::expr(method, vec_ty, &args); quote! { #method_sig { #expr.simd_into(self) From 3ab0b302ef46e6f0c1c3d2434c694e02506ee552 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Thu, 13 Nov 2025 19:48:36 -0500 Subject: [PATCH 6/6] Fix some unreachable_pub lints --- fearless_simd_gen/src/arch/fallback.rs | 3 +-- fearless_simd_gen/src/arch/neon.rs | 13 ++++--------- fearless_simd_gen/src/generic.rs | 13 ++++--------- fearless_simd_gen/src/mk_fallback.rs | 9 ++------- fearless_simd_gen/src/mk_neon.rs | 5 ++--- fearless_simd_gen/src/mk_ops.rs | 7 +------ fearless_simd_gen/src/mk_simd_trait.rs | 7 +------ fearless_simd_gen/src/mk_simd_types.rs | 7 +------ fearless_simd_gen/src/mk_wasm.rs | 5 ++--- 9 files changed, 18 insertions(+), 51 deletions(-) diff --git a/fearless_simd_gen/src/arch/fallback.rs b/fearless_simd_gen/src/arch/fallback.rs index 62ebaa47..52a275e9 100644 --- a/fearless_simd_gen/src/arch/fallback.rs +++ b/fearless_simd_gen/src/arch/fallback.rs @@ -3,7 +3,6 @@ #![expect( clippy::match_single_binding, - unreachable_pub, reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" )] @@ -63,7 +62,7 @@ pub(crate) fn translate_op(op: &str, is_float: bool) -> Option<&'static str> { }) } -pub fn simple_intrinsic(name: &str, ty: &VecType) -> TokenStream { +pub(crate) fn simple_intrinsic(name: &str, ty: &VecType) -> TokenStream { let ty_prefix = arch_ty(ty); let ident = Ident::new(name, Span::call_site()); diff --git a/fearless_simd_gen/src/arch/neon.rs b/fearless_simd_gen/src/arch/neon.rs index 6257a5a4..7d6ccbd9 100644 --- a/fearless_simd_gen/src/arch/neon.rs +++ b/fearless_simd_gen/src/arch/neon.rs @@ -1,11 +1,6 @@ // Copyright 2025 the Fearless_SIMD Authors // SPDX-License-Identifier: Apache-2.0 OR MIT -#![expect( - unreachable_pub, - reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" -)] - use crate::types::{ScalarType, VecType}; use proc_macro2::{Ident, Span, TokenStream}; use quote::quote; @@ -101,7 +96,7 @@ fn neon_array_type(ty: &VecType) -> (&'static str, &'static str, usize) { (opt_q(ty), scalar_c, ty.scalar_bits) } -pub fn opt_q(ty: &VecType) -> &'static str { +pub(crate) fn opt_q(ty: &VecType) -> &'static str { match ty.n_bits() { 64 => "", 128 => "q", @@ -109,7 +104,7 @@ pub fn opt_q(ty: &VecType) -> &'static str { } } -pub fn simple_intrinsic(name: &str, ty: &VecType) -> Ident { +pub(crate) fn simple_intrinsic(name: &str, ty: &VecType) -> Ident { let (opt_q, scalar_c, size) = neon_array_type(ty); Ident::new( &format!("{name}{opt_q}_{scalar_c}{size}"), @@ -117,7 +112,7 @@ pub fn simple_intrinsic(name: &str, ty: &VecType) -> Ident { ) } -pub fn split_intrinsic(name: &str, name2: &str, ty: &VecType) -> Ident { +pub(crate) fn split_intrinsic(name: &str, name2: &str, ty: &VecType) -> Ident { let (opt_q, scalar_c, size) = neon_array_type(ty); Ident::new( &format!("{name}{opt_q}_{name2}_{scalar_c}{size}"), @@ -125,7 +120,7 @@ pub fn split_intrinsic(name: &str, name2: &str, ty: &VecType) -> Ident { ) } -pub fn cvt_intrinsic(name: &str, to_ty: &VecType, from_ty: &VecType) -> Ident { +pub(crate) fn cvt_intrinsic(name: &str, to_ty: &VecType, from_ty: &VecType) -> Ident { let (opt_q, from_scalar_c, from_size) = neon_array_type(from_ty); let (_opt_q, to_scalar_c, to_size) = neon_array_type(to_ty); Ident::new( diff --git a/fearless_simd_gen/src/generic.rs b/fearless_simd_gen/src/generic.rs index 82bd0989..eab607dc 100644 --- a/fearless_simd_gen/src/generic.rs +++ b/fearless_simd_gen/src/generic.rs @@ -1,11 +1,6 @@ // Copyright 2025 the Fearless_SIMD Authors // SPDX-License-Identifier: Apache-2.0 OR MIT -#![expect( - unreachable_pub, - reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" -)] - use proc_macro2::{Ident, Span, TokenStream}; use quote::{format_ident, quote}; @@ -16,7 +11,7 @@ use crate::{ }; /// Implementation of combine based on `copy_from_slice` -pub fn generic_combine(ty: &VecType) -> TokenStream { +pub(crate) fn generic_combine(ty: &VecType) -> TokenStream { let ty_rust = ty.rust(); let n = ty.len; let n2 = n * 2; @@ -38,7 +33,7 @@ pub fn generic_combine(ty: &VecType) -> TokenStream { } /// Implementation of split based on `copy_from_slice` -pub fn generic_split(ty: &VecType) -> TokenStream { +pub(crate) fn generic_split(ty: &VecType) -> TokenStream { let ty_rust = ty.rust(); let n = ty.len; let nhalf = n / 2; @@ -63,7 +58,7 @@ pub fn generic_split(ty: &VecType) -> TokenStream { /// Implementation based on split/combine /// /// Only suitable for lane-wise and block-wise operations -pub fn generic_op(op: &str, sig: OpSig, ty: &VecType) -> TokenStream { +pub(crate) fn generic_op(op: &str, sig: OpSig, ty: &VecType) -> TokenStream { let ty_rust = ty.rust(); let name = Ident::new(&format!("{op}_{}", ty.rust_name()), Span::call_site()); let split = Ident::new(&format!("split_{}", ty.rust_name()), Span::call_site()); @@ -271,7 +266,7 @@ pub fn generic_op(op: &str, sig: OpSig, ty: &VecType) -> TokenStream { } } -pub fn scalar_binary(name: &Ident, f: TokenStream, ty: &VecType) -> TokenStream { +pub(crate) fn scalar_binary(name: &Ident, f: TokenStream, ty: &VecType) -> TokenStream { let ty_rust = ty.rust(); quote! { #[inline(always)] diff --git a/fearless_simd_gen/src/mk_fallback.rs b/fearless_simd_gen/src/mk_fallback.rs index 3ff93dab..671ad82e 100644 --- a/fearless_simd_gen/src/mk_fallback.rs +++ b/fearless_simd_gen/src/mk_fallback.rs @@ -1,11 +1,6 @@ // Copyright 2025 the Fearless_SIMD Authors // SPDX-License-Identifier: Apache-2.0 OR MIT -#![expect( - unreachable_pub, - reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" -)] - use crate::arch::fallback; use crate::generic::{generic_combine, generic_op, generic_split}; use crate::ops::{OpSig, TyFlavor, ops_for_type, reinterpret_ty, valid_reinterpret}; @@ -14,7 +9,7 @@ use proc_macro2::{Ident, Span, TokenStream}; use quote::quote; #[derive(Clone, Copy)] -pub struct Level; +pub(crate) struct Level; impl Level { fn name(self) -> &'static str { @@ -27,7 +22,7 @@ impl Level { } } -pub fn mk_fallback_impl() -> TokenStream { +pub(crate) fn mk_fallback_impl() -> TokenStream { let imports = type_imports(); let simd_impl = mk_simd_impl(); diff --git a/fearless_simd_gen/src/mk_neon.rs b/fearless_simd_gen/src/mk_neon.rs index c653724a..693ae9fc 100644 --- a/fearless_simd_gen/src/mk_neon.rs +++ b/fearless_simd_gen/src/mk_neon.rs @@ -3,7 +3,6 @@ #![expect( clippy::uninlined_format_args, - unreachable_pub, reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" )] @@ -21,7 +20,7 @@ use crate::{ }; #[derive(Clone, Copy)] -pub enum Level { +pub(crate) enum Level { Neon, // TODO: Fp16, } @@ -39,7 +38,7 @@ impl Level { } } -pub fn mk_neon_impl(level: Level) -> TokenStream { +pub(crate) fn mk_neon_impl(level: Level) -> TokenStream { let imports = type_imports(); let simd_impl = mk_simd_impl(level); let ty_impl = mk_type_impl(); diff --git a/fearless_simd_gen/src/mk_ops.rs b/fearless_simd_gen/src/mk_ops.rs index 5ed974ae..bde52646 100644 --- a/fearless_simd_gen/src/mk_ops.rs +++ b/fearless_simd_gen/src/mk_ops.rs @@ -1,17 +1,12 @@ // Copyright 2025 the Fearless_SIMD Authors // SPDX-License-Identifier: Apache-2.0 OR MIT -#![expect( - unreachable_pub, - reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" -)] - use proc_macro2::{Ident, Span, TokenStream}; use quote::{format_ident, quote}; use crate::types::{SIMD_TYPES, ScalarType, type_imports}; -pub fn mk_ops() -> TokenStream { +pub(crate) fn mk_ops() -> TokenStream { let imports = type_imports(); let mut impls = vec![]; diff --git a/fearless_simd_gen/src/mk_simd_trait.rs b/fearless_simd_gen/src/mk_simd_trait.rs index 5c47410a..6b9fffb4 100644 --- a/fearless_simd_gen/src/mk_simd_trait.rs +++ b/fearless_simd_gen/src/mk_simd_trait.rs @@ -1,11 +1,6 @@ // Copyright 2025 the Fearless_SIMD Authors // SPDX-License-Identifier: Apache-2.0 OR MIT -#![expect( - unreachable_pub, - reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" -)] - use proc_macro2::{Ident, Span, TokenStream}; use quote::quote; @@ -14,7 +9,7 @@ use crate::{ types::{SIMD_TYPES, type_imports}, }; -pub fn mk_simd_trait() -> TokenStream { +pub(crate) fn mk_simd_trait() -> TokenStream { let imports = type_imports(); let mut methods = vec![]; // Float methods diff --git a/fearless_simd_gen/src/mk_simd_types.rs b/fearless_simd_gen/src/mk_simd_types.rs index ee71e374..375e4e8a 100644 --- a/fearless_simd_gen/src/mk_simd_types.rs +++ b/fearless_simd_gen/src/mk_simd_types.rs @@ -1,11 +1,6 @@ // Copyright 2025 the Fearless_SIMD Authors // SPDX-License-Identifier: Apache-2.0 OR MIT -#![expect( - unreachable_pub, - reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" -)] - use proc_macro2::{Ident, Literal, Span, TokenStream}; use quote::{format_ident, quote}; @@ -14,7 +9,7 @@ use crate::{ types::{SIMD_TYPES, ScalarType, VecType}, }; -pub fn mk_simd_types() -> TokenStream { +pub(crate) fn mk_simd_types() -> TokenStream { let mut result = quote! { use crate::{Bytes, Select, Simd, SimdFrom, SimdInto, SimdCvtFloat, SimdCvtTruncate}; }; diff --git a/fearless_simd_gen/src/mk_wasm.rs b/fearless_simd_gen/src/mk_wasm.rs index 145691f5..56305658 100644 --- a/fearless_simd_gen/src/mk_wasm.rs +++ b/fearless_simd_gen/src/mk_wasm.rs @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT #![expect( - unreachable_pub, clippy::missing_assert_message, reason = "TODO: https://github.com/linebender/fearless_simd/issues/40" )] @@ -20,7 +19,7 @@ use crate::{ }; #[derive(Clone, Copy)] -pub enum Level { +pub(crate) enum Level { WasmSimd128, } @@ -550,7 +549,7 @@ fn mk_simd_impl(level: Level) -> TokenStream { } } -pub fn mk_wasm128_impl(level: Level) -> TokenStream { +pub(crate) fn mk_wasm128_impl(level: Level) -> TokenStream { let imports = type_imports(); let simd_impl = mk_simd_impl(level); let ty_impl = mk_type_impl();