From ae74c67b8c44ebb2264fd6f70bad7f971a616785 Mon Sep 17 00:00:00 2001 From: LaoLittle Date: Sat, 28 Feb 2026 12:23:31 -0600 Subject: [PATCH 1/6] fix: aarch64 lowering iadd without checking input type --- cranelift/codegen/src/isa/aarch64/lower.isle | 24 ++++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index e76bb65a3b66..c1c6d2995c4c 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -51,55 +51,55 @@ ;; `i64` and smaller ;; Base case, simply adding things in registers. -(rule iadd_base_case -1 (lower (has_type (fits_in_64 ty) (iadd x y))) +(rule iadd_base_case -1 (lower (has_type (fits_in_64 (ty_int ty)) (iadd x y))) (add ty x y)) ;; Special cases for when one operand is an immediate that fits in 12 bits. -(rule iadd_imm12_right 4 (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_value y)))) +(rule iadd_imm12_right 4 (lower (has_type (fits_in_64 (ty_int ty)) (iadd x (imm12_from_value y)))) (add_imm ty x y)) -(rule iadd_imm12_left 5 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y))) +(rule iadd_imm12_left 5 (lower (has_type (fits_in_64 (ty_int ty)) (iadd (imm12_from_value x) y))) (add_imm ty y x)) ;; Same as the previous special cases, except we can switch the addition to a ;; subtraction if the negated immediate fits in 12 bits. -(rule iadd_imm12_neg_right 2 (lower (has_type (fits_in_64 ty) (iadd x y))) +(rule iadd_imm12_neg_right 2 (lower (has_type (fits_in_64 (ty_int ty)) (iadd x y))) (if-let imm12_neg (imm12_from_negated_value y)) (sub_imm ty x imm12_neg)) -(rule iadd_imm12_neg_left 3 (lower (has_type (fits_in_64 ty) (iadd x y))) +(rule iadd_imm12_neg_left 3 (lower (has_type (fits_in_64 (ty_int ty)) (iadd x y))) (if-let imm12_neg (imm12_from_negated_value x)) (sub_imm ty y imm12_neg)) ;; Special cases for when we're adding an extended register where the extending ;; operation can get folded into the add itself. -(rule iadd_extend_right 0 (lower (has_type (fits_in_64 ty) (iadd x (extended_value_from_value y)))) +(rule iadd_extend_right 0 (lower (has_type (fits_in_64 (ty_int ty)) (iadd x (extended_value_from_value y)))) (add_extend ty x y)) -(rule iadd_extend_left 1 (lower (has_type (fits_in_64 ty) (iadd (extended_value_from_value x) y))) +(rule iadd_extend_left 1 (lower (has_type (fits_in_64 (ty_int ty)) (iadd (extended_value_from_value x) y))) (add_extend ty y x)) ;; Special cases for when we're adding the shift of a different ;; register by a constant amount and the shift can get folded into the add. -(rule iadd_ishl_right 7 (lower (has_type (fits_in_64 ty) +(rule iadd_ishl_right 7 (lower (has_type (fits_in_64 (ty_int ty)) (iadd x (ishl y (iconst k))))) (if-let amt (lshl_from_imm64 ty k)) (add_shift ty x y amt)) -(rule iadd_ishl_left 6 (lower (has_type (fits_in_64 ty) +(rule iadd_ishl_left 6 (lower (has_type (fits_in_64 (ty_int ty)) (iadd (ishl x (iconst k)) y))) (if-let amt (lshl_from_imm64 ty k)) (add_shift ty y x amt)) ;; Fold an `iadd` and `imul` combination into a `madd` instruction. -(rule iadd_imul_right 7 (lower (has_type (fits_in_64 ty) (iadd x (imul y z)))) +(rule iadd_imul_right 7 (lower (has_type (fits_in_64 (ty_int ty)) (iadd x (imul y z)))) (madd ty y z x)) -(rule iadd_imul_left 6 (lower (has_type (fits_in_64 ty) (iadd (imul x y) z))) +(rule iadd_imul_left 6 (lower (has_type (fits_in_64 (ty_int ty)) (iadd (imul x y) z))) (madd ty x y z)) ;; Fold an `isub` and `imul` combination into a `msub` instruction. -(rule isub_imul (lower (has_type (fits_in_64 ty) (isub x (imul y z)))) +(rule isub_imul (lower (has_type (fits_in_64 (ty_int ty)) (isub x (imul y z)))) (msub ty y z x)) ;; vectors From 9d92ad3c981699c6bf57440f69d6e0d3cf111beb Mon Sep 17 00:00:00 2001 From: LaoLittle Date: Sat, 28 Feb 2026 12:54:19 -0600 Subject: [PATCH 2/6] add regression test for aarch64 bitcast --- .../filetests/filetests/runtests/issue-12696.clif | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 cranelift/filetests/filetests/runtests/issue-12696.clif diff --git a/cranelift/filetests/filetests/runtests/issue-12696.clif b/cranelift/filetests/filetests/runtests/issue-12696.clif new file mode 100644 index 000000000000..06ff3d7388e0 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/issue-12696.clif @@ -0,0 +1,14 @@ +test run +target aarch64 + +function %bitcast_neon_repro() -> i64x2 { +block0: + v0 = iconst.i64 0x0001_0001_0001_0001 + v1 = bitcast.i16x4 little v0 + v2 = iadd v1, v1 ; it compiles + v3 = bitcast.i64 little v2 + v4 = scalar_to_vector.i64x2 v3 + return v4 +} + +; run: %bitcast_neon_repro() == [0x0002000200020002 0] \ No newline at end of file From 737b712d081b54a87ec48b9496ec528dc34d7a86 Mon Sep 17 00:00:00 2001 From: LaoLittle Date: Sat, 28 Feb 2026 13:49:16 -0600 Subject: [PATCH 3/6] use a more straightfoward name for the test --- cranelift/filetests/filetests/runtests/issue-12696.clif | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cranelift/filetests/filetests/runtests/issue-12696.clif b/cranelift/filetests/filetests/runtests/issue-12696.clif index 06ff3d7388e0..abae4c4ad9f0 100644 --- a/cranelift/filetests/filetests/runtests/issue-12696.clif +++ b/cranelift/filetests/filetests/runtests/issue-12696.clif @@ -1,7 +1,7 @@ test run target aarch64 -function %bitcast_neon_repro() -> i64x2 { +function %iadd_repro() -> i64x2 { block0: v0 = iconst.i64 0x0001_0001_0001_0001 v1 = bitcast.i16x4 little v0 @@ -11,4 +11,4 @@ block0: return v4 } -; run: %bitcast_neon_repro() == [0x0002000200020002 0] \ No newline at end of file +; run: %iadd_repro() == [0x0002000200020002 0] \ No newline at end of file From ad9e3b6123ccd855891ef11a657205e7767b9319 Mon Sep 17 00:00:00 2001 From: LaoLittle Date: Sat, 28 Feb 2026 14:32:03 -0600 Subject: [PATCH 4/6] add tests for isub and imul --- .../filetests/runtests/issue-12696.clif | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/cranelift/filetests/filetests/runtests/issue-12696.clif b/cranelift/filetests/filetests/runtests/issue-12696.clif index abae4c4ad9f0..8e3d302572c7 100644 --- a/cranelift/filetests/filetests/runtests/issue-12696.clif +++ b/cranelift/filetests/filetests/runtests/issue-12696.clif @@ -11,4 +11,28 @@ block0: return v4 } -; run: %iadd_repro() == [0x0002000200020002 0] \ No newline at end of file +; run: %iadd_repro() == [0x0002000200020002 0] + +function %isub_repro() -> i64x2 { +block0: + v0 = iconst.i64 0x0001_0001_0001_0001 + v1 = bitcast.i16x4 little v0 + v2 = isub v1, v1 ; it compiles + v3 = bitcast.i64 little v2 + v4 = scalar_to_vector.i64x2 v3 + return v4 +} + +; run: %isub_repro() == [0 0] + +function %imul_repro() -> i64x2 { +block0: + v0 = iconst.i64 0x0002_0002_0002_0002 + v1 = bitcast.i16x4 little v0 + v2 = imul v1, v1 ; it compiles + v3 = bitcast.i64 little v2 + v4 = scalar_to_vector.i64x2 v3 + return v4 +} + +; run: %imul_repro() == [0x0004000400040004 0] \ No newline at end of file From 1b2a4f8e0d26fc77f85d7bcbefdece90aa9b5902 Mon Sep 17 00:00:00 2001 From: LaoLittle Date: Sat, 28 Feb 2026 14:32:20 -0600 Subject: [PATCH 5/6] fix: isub/imul now correctly handle vector types --- cranelift/codegen/src/isa/aarch64/lower.isle | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index c1c6d2995c4c..614d568079f4 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -720,27 +720,27 @@ ;; `i64` and smaller ;; Base case, simply subtracting things in registers. -(rule isub_base_case -4 (lower (has_type (fits_in_64 ty) (isub x y))) +(rule isub_base_case -4 (lower (has_type (fits_in_64 (ty_int ty)) (isub x y))) (sub ty x y)) ;; Special case for when one operand is an immediate that fits in 12 bits. -(rule isub_imm12 0 (lower (has_type (fits_in_64 ty) (isub x (imm12_from_value y)))) +(rule isub_imm12 0 (lower (has_type (fits_in_64 (ty_int ty)) (isub x (imm12_from_value y)))) (sub_imm ty x y)) ;; Same as the previous special case, except we can switch the subtraction to an ;; addition if the negated immediate fits in 12 bits. -(rule isub_imm12_neg 2 (lower (has_type (fits_in_64 ty) (isub x y))) +(rule isub_imm12_neg 2 (lower (has_type (fits_in_64 (ty_int ty)) (isub x y))) (if-let imm12_neg (imm12_from_negated_value y)) (add_imm ty x imm12_neg)) ;; Special cases for when we're subtracting an extended register where the ;; extending operation can get folded into the sub itself. -(rule isub_extend 1 (lower (has_type (fits_in_64 ty) (isub x (extended_value_from_value y)))) +(rule isub_extend 1 (lower (has_type (fits_in_64 (ty_int ty)) (isub x (extended_value_from_value y)))) (sub_extend ty x y)) ;; Finally a special case for when we're subtracting the shift of a different ;; register by a constant amount and the shift can get folded into the sub. -(rule isub_ishl -3 (lower (has_type (fits_in_64 ty) +(rule isub_ishl -3 (lower (has_type (fits_in_64 (ty_int ty)) (isub x (ishl y (iconst k))))) (if-let amt (lshl_from_imm64 ty k)) (sub_shift ty x y amt)) @@ -790,7 +790,7 @@ ;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i64` and smaller. -(rule imul_base_case -3 (lower (has_type (fits_in_64 ty) (imul x y))) +(rule imul_base_case -3 (lower (has_type (fits_in_64 (ty_int ty)) (imul x y))) (madd ty x y (zero_reg))) ;; `i128`. @@ -841,8 +841,8 @@ (madd $I64 x y (zero_reg)) (smulh $I64 x y)))) -;; Case for i8x16, i16x8, and i32x4. -(rule -2 (lower (has_type (ty_vec128 ty @ (not_i64x2)) (imul x y))) +;; vectors (i8x8/i8x16/i16x4/i16x8/i32x2/i32x4) +(rule -2 (lower (has_type (lane_fits_in_32 ty @ (multi_lane _ _)) (imul x y))) (mul x y (vector_size ty))) ;; Special lowering for i64x2. From b8dc3cabaa48e60a38c9faa8ec523e1409bd4040 Mon Sep 17 00:00:00 2001 From: LaoLittle Date: Sat, 28 Feb 2026 15:26:35 -0600 Subject: [PATCH 6/6] fix: missing checks in ty_int_ref_scalar_64, and we use it in place of fits_in_32 + ty_int --- cranelift/codegen/src/isa/aarch64/lower.isle | 36 ++++++++++---------- cranelift/codegen/src/isle_prelude.rs | 2 +- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 614d568079f4..dbd8b27fa098 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -51,55 +51,55 @@ ;; `i64` and smaller ;; Base case, simply adding things in registers. -(rule iadd_base_case -1 (lower (has_type (fits_in_64 (ty_int ty)) (iadd x y))) +(rule iadd_base_case -1 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x y))) (add ty x y)) ;; Special cases for when one operand is an immediate that fits in 12 bits. -(rule iadd_imm12_right 4 (lower (has_type (fits_in_64 (ty_int ty)) (iadd x (imm12_from_value y)))) +(rule iadd_imm12_right 4 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x (imm12_from_value y)))) (add_imm ty x y)) -(rule iadd_imm12_left 5 (lower (has_type (fits_in_64 (ty_int ty)) (iadd (imm12_from_value x) y))) +(rule iadd_imm12_left 5 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd (imm12_from_value x) y))) (add_imm ty y x)) ;; Same as the previous special cases, except we can switch the addition to a ;; subtraction if the negated immediate fits in 12 bits. -(rule iadd_imm12_neg_right 2 (lower (has_type (fits_in_64 (ty_int ty)) (iadd x y))) +(rule iadd_imm12_neg_right 2 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x y))) (if-let imm12_neg (imm12_from_negated_value y)) (sub_imm ty x imm12_neg)) -(rule iadd_imm12_neg_left 3 (lower (has_type (fits_in_64 (ty_int ty)) (iadd x y))) +(rule iadd_imm12_neg_left 3 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x y))) (if-let imm12_neg (imm12_from_negated_value x)) (sub_imm ty y imm12_neg)) ;; Special cases for when we're adding an extended register where the extending ;; operation can get folded into the add itself. -(rule iadd_extend_right 0 (lower (has_type (fits_in_64 (ty_int ty)) (iadd x (extended_value_from_value y)))) +(rule iadd_extend_right 0 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x (extended_value_from_value y)))) (add_extend ty x y)) -(rule iadd_extend_left 1 (lower (has_type (fits_in_64 (ty_int ty)) (iadd (extended_value_from_value x) y))) +(rule iadd_extend_left 1 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd (extended_value_from_value x) y))) (add_extend ty y x)) ;; Special cases for when we're adding the shift of a different ;; register by a constant amount and the shift can get folded into the add. -(rule iadd_ishl_right 7 (lower (has_type (fits_in_64 (ty_int ty)) +(rule iadd_ishl_right 7 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x (ishl y (iconst k))))) (if-let amt (lshl_from_imm64 ty k)) (add_shift ty x y amt)) -(rule iadd_ishl_left 6 (lower (has_type (fits_in_64 (ty_int ty)) +(rule iadd_ishl_left 6 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd (ishl x (iconst k)) y))) (if-let amt (lshl_from_imm64 ty k)) (add_shift ty y x amt)) ;; Fold an `iadd` and `imul` combination into a `madd` instruction. -(rule iadd_imul_right 7 (lower (has_type (fits_in_64 (ty_int ty)) (iadd x (imul y z)))) +(rule iadd_imul_right 7 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x (imul y z)))) (madd ty y z x)) -(rule iadd_imul_left 6 (lower (has_type (fits_in_64 (ty_int ty)) (iadd (imul x y) z))) +(rule iadd_imul_left 6 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd (imul x y) z))) (madd ty x y z)) ;; Fold an `isub` and `imul` combination into a `msub` instruction. -(rule isub_imul (lower (has_type (fits_in_64 (ty_int ty)) (isub x (imul y z)))) +(rule isub_imul (lower (has_type (ty_int_ref_scalar_64 ty) (isub x (imul y z)))) (msub ty y z x)) ;; vectors @@ -720,27 +720,27 @@ ;; `i64` and smaller ;; Base case, simply subtracting things in registers. -(rule isub_base_case -4 (lower (has_type (fits_in_64 (ty_int ty)) (isub x y))) +(rule isub_base_case -4 (lower (has_type (ty_int_ref_scalar_64 ty) (isub x y))) (sub ty x y)) ;; Special case for when one operand is an immediate that fits in 12 bits. -(rule isub_imm12 0 (lower (has_type (fits_in_64 (ty_int ty)) (isub x (imm12_from_value y)))) +(rule isub_imm12 0 (lower (has_type (ty_int_ref_scalar_64 ty) (isub x (imm12_from_value y)))) (sub_imm ty x y)) ;; Same as the previous special case, except we can switch the subtraction to an ;; addition if the negated immediate fits in 12 bits. -(rule isub_imm12_neg 2 (lower (has_type (fits_in_64 (ty_int ty)) (isub x y))) +(rule isub_imm12_neg 2 (lower (has_type (ty_int_ref_scalar_64 ty) (isub x y))) (if-let imm12_neg (imm12_from_negated_value y)) (add_imm ty x imm12_neg)) ;; Special cases for when we're subtracting an extended register where the ;; extending operation can get folded into the sub itself. -(rule isub_extend 1 (lower (has_type (fits_in_64 (ty_int ty)) (isub x (extended_value_from_value y)))) +(rule isub_extend 1 (lower (has_type (ty_int_ref_scalar_64 ty) (isub x (extended_value_from_value y)))) (sub_extend ty x y)) ;; Finally a special case for when we're subtracting the shift of a different ;; register by a constant amount and the shift can get folded into the sub. -(rule isub_ishl -3 (lower (has_type (fits_in_64 (ty_int ty)) +(rule isub_ishl -3 (lower (has_type (ty_int_ref_scalar_64 ty) (isub x (ishl y (iconst k))))) (if-let amt (lshl_from_imm64 ty k)) (sub_shift ty x y amt)) @@ -790,7 +790,7 @@ ;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i64` and smaller. -(rule imul_base_case -3 (lower (has_type (fits_in_64 (ty_int ty)) (imul x y))) +(rule imul_base_case -3 (lower (has_type (ty_int_ref_scalar_64 ty) (imul x y))) (madd ty x y (zero_reg))) ;; `i128`. diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index a2b58b15431d..c10446b75b40 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -280,7 +280,7 @@ macro_rules! isle_common_prelude_methods { #[inline] fn ty_int_ref_scalar_64(&mut self, ty: Type) -> Option { - if ty.bits() <= 64 && !ty.is_float() && !ty.is_vector() { + if ty.bits() <= 64 && !ty.is_float() && !ty.is_vector() && !ty.is_dynamic_vector() { Some(ty) } else { None