alexcrichton
diff --git a/‎cranelift/codegen/meta/src/shared/instructions.rs‎
Lines changed: 0 additions & 36 deletions b/‎cranelift/codegen/meta/src/shared/instructions.rs‎
Lines changed: 0 additions & 36 deletions
diff --git a/‎cranelift/codegen/src/isa/aarch64/lower.isle‎
Lines changed: 0 additions & 18 deletions b/‎cranelift/codegen/src/isa/aarch64/lower.isle‎
Lines changed: 0 additions & 18 deletions
diff --git a/‎cranelift/codegen/src/isa/aarch64/lower_dynamic_neon.isle‎
Lines changed: 0 additions & 12 deletions b/‎cranelift/codegen/src/isa/aarch64/lower_dynamic_neon.isle‎
Lines changed: 0 additions & 12 deletions
diff --git a/‎cranelift/codegen/src/isa/riscv64/inst.isle‎
Lines changed: 0 additions & 17 deletions b/‎cranelift/codegen/src/isa/riscv64/inst.isle‎
Lines changed: 0 additions & 17 deletions
diff --git a/‎cranelift/codegen/src/isa/riscv64/inst/emit.rs‎
Lines changed: 0 additions & 48 deletions b/‎cranelift/codegen/src/isa/riscv64/inst/emit.rs‎
Lines changed: 0 additions & 48 deletions
diff --git a/‎cranelift/codegen/src/isa/riscv64/inst/mod.rs‎
Lines changed: 0 additions & 30 deletions b/‎cranelift/codegen/src/isa/riscv64/inst/mod.rs‎
Lines changed: 0 additions & 30 deletions
diff --git a/‎cranelift/codegen/src/isa/riscv64/lower.isle‎
Lines changed: 0 additions & 18 deletions b/‎cranelift/codegen/src/isa/riscv64/lower.isle‎
Lines changed: 0 additions & 18 deletions
diff --git a/‎cranelift/codegen/src/isa/s390x/lower.isle‎
Lines changed: 13 additions & 14 deletions b/‎cranelift/codegen/src/isa/s390x/lower.isle‎
Lines changed: 13 additions & 14 deletions
diff --git a/‎cranelift/codegen/src/isa/x64/lower.isle‎
Lines changed: 27 additions & 22 deletions b/‎cranelift/codegen/src/isa/x64/lower.isle‎
Lines changed: 27 additions & 22 deletions
diff --git a/‎cranelift/codegen/src/opts/selects.isle‎
Lines changed: 0 additions & 1 deletion b/‎cranelift/codegen/src/opts/selects.isle‎
Lines changed: 0 additions & 1 deletion
@@ -2931,24 +2931,6 @@ pub(crate) fn define(
         ]),
     );
 
-    ig.push(
-        Inst::new(
-            "fmin_pseudo",
-            r#"
-        Floating point pseudo-minimum, propagating NaNs.  This behaves differently from ``fmin``.
-        See <https://github.com/WebAssembly/simd/pull/122> for background.
-
-        The behaviour is defined as ``fmin_pseudo(a, b) = (b < a) ? b : a``, and the behaviour
-        for zero or NaN inputs follows from the behaviour of ``<`` with such inputs.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![Operand::new("x", Float), Operand::new("y", Float)])
-        .operands_out(vec![
-            Operand::new("a", Float).with_doc("The smaller of ``x`` and ``y``")
-        ]),
-    );
-
     ig.push(
         Inst::new(
             "fmax",
@@ -2968,24 +2950,6 @@ pub(crate) fn define(
         ]),
     );
 
-    ig.push(
-        Inst::new(
-            "fmax_pseudo",
-            r#"
-        Floating point pseudo-maximum, propagating NaNs.  This behaves differently from ``fmax``.
-        See <https://github.com/WebAssembly/simd/pull/122> for background.
-
-        The behaviour is defined as ``fmax_pseudo(a, b) = (a < b) ? b : a``, and the behaviour
-        for zero or NaN inputs follows from the behaviour of ``<`` with such inputs.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![Operand::new("x", Float), Operand::new("y", Float)])
-        .operands_out(vec![
-            Operand::new("a", Float).with_doc("The larger of ``x`` and ``y``")
-        ]),
-    );
-
     ig.push(
         Inst::new(
             "ceil",
 
@@ -415,24 +415,6 @@
 (rule (lower (has_type (ty_scalar_float ty) (fmax rn rm)))
       (fpu_rrr (FPUOp2.Max) rn rm (scalar_size ty)))
 
-;;;; Rules for `fmin_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule -1 (lower (has_type ty @ (multi_lane _ _) (fmin_pseudo rm rn)))
-      (bsl ty (vec_rrr (VecALUOp.Fcmgt) rm rn (vector_size ty)) rn rm))
-
-(rule (lower (has_type (ty_scalar_float ty) (fmin_pseudo rm rn)))
-      (with_flags (fpu_cmp (scalar_size ty) rm rn)
-                  (fpu_csel ty (Cond.Gt) rn rm)))
-
-;;;; Rules for `fmax_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule -1 (lower (has_type ty @ (multi_lane _ _) (fmax_pseudo rm rn)))
-      (bsl ty (vec_rrr (VecALUOp.Fcmgt) rn rm (vector_size ty)) rn rm))
-
-(rule (lower (has_type (ty_scalar_float ty) (fmax_pseudo rm rn)))
-      (with_flags (fpu_cmp (scalar_size ty) rn rm)
-                  (fpu_csel ty (Cond.Gt) rn rm)))
-
 ;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule -1 (lower (has_type ty @ (multi_lane _ _) (sqrt x)))
 
@@ -35,18 +35,6 @@
 (rule -2 (lower (has_type ty @ (dynamic_lane _ _) (fmax x y)))
       (value_reg (vec_rrr (VecALUOp.Fmax) (put_in_reg x) (put_in_reg y) (vector_size ty))))
 
-;;;; Rules for `fmin_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule -2 (lower (has_type ty @ (dynamic_lane _ _) (fmin_pseudo x y)))
-      (value_reg (bsl ty
-                  (vec_rrr (VecALUOp.Fcmgt) (put_in_reg x) (put_in_reg y)
-                   (vector_size ty)) (put_in_reg y) (put_in_reg x))))
-
-;;;; Rules for `fmax_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(rule -2 (lower (has_type ty @ (dynamic_lane _ _) (fmax_pseudo x y)))
-      (value_reg (bsl ty
-                  (vec_rrr (VecALUOp.Fcmgt) (put_in_reg y) (put_in_reg x)
-                   (vector_size ty)) (put_in_reg y) (put_in_reg x))))
-
 ;;;; Rules for `snarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 (rule -2 (lower (has_type (ty_dyn128_int ty) (snarrow x y)))
       (if-let _ (zero_value y))
 
@@ -288,14 +288,6 @@
       (rs1 Reg)
       (rs2 Reg)
       (ty Type))
-    (FloatSelectPseudo
-      (op FloatSelectOP)
-      (rd WritableReg)
-      ;; a integer register
-      (tmp WritableReg)
-      (rs1 Reg)
-      (rs2 Reg)
-      (ty Type))
 
     ;; popcnt  if target doesn't support extension B
     ;; use iteration to implement.
@@ -986,15 +978,6 @@
       (_ Unit (emit (MInst.FloatRound op rd tmp tmp2 rs ty))))
     (writable_reg_to_reg rd)))
 
-(decl gen_float_select_pseudo (FloatSelectOP Reg Reg Type) Reg)
-(rule
-  (gen_float_select_pseudo op x y ty)
-  (let
-    ((rd WritableReg (temp_writable_reg ty))
-      (tmp WritableXReg (temp_writable_xreg))
-      (_ Unit (emit (MInst.FloatSelectPseudo op rd tmp x y ty))))
-    (writable_reg_to_reg rd)))
-
 (decl gen_float_select (FloatSelectOP Reg Reg Type) Reg)
 (rule
   (gen_float_select op x y ty)
 
@@ -458,7 +458,6 @@ impl Inst {
             | Inst::DummyUse { .. }
             | Inst::FloatRound { .. }
             | Inst::FloatSelect { .. }
-            | Inst::FloatSelectPseudo { .. }
             | Inst::Popcnt { .. }
             | Inst::Rev8 { .. }
             | Inst::Cltz { .. }
@@ -2242,53 +2241,6 @@ impl MachInstEmit for Inst {
                 Inst::gen_move(rd, rs, ty).emit(&[], sink, emit_info, state);
                 sink.bind_label(label_jump_over, &mut state.ctrl_plane);
             }
-            &Inst::FloatSelectPseudo {
-                op,
-                rd,
-                tmp,
-                rs1,
-                rs2,
-                ty,
-            } => {
-                let rs1 = allocs.next(rs1);
-                let rs2 = allocs.next(rs2);
-                let tmp = allocs.next_writable(tmp);
-                let rd = allocs.next_writable(rd);
-                let label_rs2 = sink.get_label();
-                let label_jump_over = sink.get_label();
-                let lt_op = if ty == F32 {
-                    FpuOPRRR::FltS
-                } else {
-                    FpuOPRRR::FltD
-                };
-                Inst::FpuRRR {
-                    alu_op: lt_op,
-                    frm: None,
-                    rd: tmp,
-                    rs1: if op == FloatSelectOP::Max { rs1 } else { rs2 },
-                    rs2: if op == FloatSelectOP::Max { rs2 } else { rs1 },
-                }
-                .emit(&[], sink, emit_info, state);
-                Inst::CondBr {
-                    taken: BranchTarget::Label(label_rs2),
-                    not_taken: BranchTarget::zero(),
-                    kind: IntegerCompare {
-                        kind: IntCC::NotEqual,
-                        rs1: tmp.to_reg(),
-                        rs2: zero_reg(),
-                    },
-                }
-                .emit(&[], sink, emit_info, state);
-                // here select rs1 as result.
-                Inst::gen_move(rd, rs1, ty).emit(&[], sink, emit_info, state);
-                Inst::Jal {
-                    dest: BranchTarget::Label(label_jump_over),
-                }
-                .emit(&[], sink, emit_info, state);
-                sink.bind_label(label_rs2, &mut state.ctrl_plane);
-                Inst::gen_move(rd, rs2, ty).emit(&[], sink, emit_info, state);
-                sink.bind_label(label_jump_over, &mut state.ctrl_plane);
-            }
 
             &Inst::FloatSelect {
                 op,
 
@@ -636,13 +636,6 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
             collector.reg_early_def(tmp);
             collector.reg_early_def(rd);
         }
-        &Inst::FloatSelectPseudo {
-            rd, tmp, rs1, rs2, ..
-        } => {
-            collector.reg_uses(&[rs1, rs2]);
-            collector.reg_early_def(tmp);
-            collector.reg_early_def(rd);
-        }
         &Inst::Popcnt {
             sum, step, rs, tmp, ..
         } => {
@@ -1136,29 +1129,6 @@ impl Inst {
                     ty
                 )
             }
-            &Inst::FloatSelectPseudo {
-                op,
-                rd,
-                tmp,
-                rs1,
-                rs2,
-                ty,
-            } => {
-                let rs1 = format_reg(rs1, allocs);
-                let rs2 = format_reg(rs2, allocs);
-                let tmp = format_reg(tmp.to_reg(), allocs);
-                let rd = format_reg(rd.to_reg(), allocs);
-                format!(
-                    "f{}.{}.pseudo {},{},{}##tmp={} ty={}",
-                    op.op_name(),
-                    if ty == F32 { "s" } else { "d" },
-                    rd,
-                    rs1,
-                    rs2,
-                    tmp,
-                    ty
-                )
-            }
             &Inst::FloatSelect {
                 op,
                 rd,
 
@@ -1300,24 +1300,6 @@
         (max VReg (rv_vfmax_vv x y (unmasked) ty)))
     (rv_vmerge_vvm vec_nan max is_not_nan ty)))
 
-;;;; Rules for `fmin_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule 0 (lower (has_type (ty_scalar_float ty) (fmin_pseudo x y)))
-  (gen_float_select_pseudo (FloatSelectOP.Min) x y ty))
-
-(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmin_pseudo x y)))
-  (let ((mask VReg (gen_fcmp_mask ty (FloatCC.LessThan) y x)))
-    (rv_vmerge_vvm x y mask ty)))
-
-;;;; Rules for `fmax_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule 0 (lower (has_type (ty_scalar_float ty) (fmax_pseudo x y)))
-  (gen_float_select_pseudo (FloatSelectOP.Max) x y ty))
-
-(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmax_pseudo x y)))
-  (let ((mask VReg (gen_fcmp_mask ty (FloatCC.LessThan) x y)))
-    (rv_vmerge_vvm x y mask ty)))
-
 ;;;;;  Rules for `stack_addr`;;;;;;;;;
 (rule
   (lower (stack_addr ss offset))
 
@@ -1134,6 +1134,13 @@
 (rule (lower (has_type (vr128_ty ty) (bitselect x y z)))
       (vec_select ty y z x))
 
+;; Special-case some float-selection instructions for min/max
+(rule 3 (lower (has_type (ty_vec128 ty) (bitselect (bitcast _ (fcmp (FloatCC.LessThan) x y)) x y)))
+        (fmin_pseudo_reg ty y x))
+(rule 4 (lower (has_type (ty_vec128 ty) (bitselect (bitcast _ (fcmp (FloatCC.LessThan) y x)) x y)))
+        (fmax_pseudo_reg ty y x))
+
+
 
 ;;;; Rules for `bmask` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
@@ -1389,20 +1396,6 @@
       (fmax_reg ty x y))
 
 
-;;;; Rules for `fmin_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Minimum of two registers.
-(rule (lower (has_type ty (fmin_pseudo x y)))
-      (fmin_pseudo_reg ty x y))
-
-
-;;;; Rules for `fmax_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Maximum of two registers.
-(rule (lower (has_type ty (fmax_pseudo x y)))
-      (fmax_pseudo_reg ty x y))
-
-
 ;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; Copysign of two registers.
@@ -3719,6 +3712,12 @@
       (select_bool_reg ty (value_nonzero val_cond)
                        (put_in_reg val_true) (put_in_reg val_false)))
 
+;; Special-case some float-selection instructions for min/max
+(rule 1 (lower (has_type (ty_scalar_float ty) (select (maybe_uextend (fcmp (FloatCC.LessThan) x y)) x y)))
+        (fmin_pseudo_reg ty y x))
+(rule 2 (lower (has_type (ty_scalar_float ty) (select (maybe_uextend (fcmp (FloatCC.LessThan) y x)) x y)))
+        (fmax_pseudo_reg ty y x))
+
 
 ;;;; Rules for `select_spectre_guard` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 
@@ -1383,6 +1383,21 @@
 (decl pure vconst_all_ones_or_all_zeros () Constant)
 (extern extractor vconst_all_ones_or_all_zeros vconst_all_ones_or_all_zeros)
 
+;; Specializations for floating-pointer compares to generate a `minp*` or a
+;; `maxp*` instruction. These are equivalent to the wasm `f32x4.{pmin,pmax}`
+;; instructions and how they're lowered into CLIF. Note the careful ordering
+;; of all the operands here to ensure that the input CLIF matched is implemented
+;; by the corresponding x64 instruction.
+(rule 2 (lower (has_type $F32X4 (bitselect (bitcast _ (fcmp (FloatCC.LessThan) x y)) x y)))
+        (x64_minps x y))
+(rule 2 (lower (has_type $F64X2 (bitselect (bitcast _ (fcmp (FloatCC.LessThan) x y)) x y)))
+        (x64_minpd x y))
+
+(rule 3 (lower (has_type $F32X4 (bitselect (bitcast _ (fcmp (FloatCC.LessThan) y x)) x y)))
+        (x64_maxps x y))
+(rule 3 (lower (has_type $F64X2 (bitselect (bitcast _ (fcmp (FloatCC.LessThan) y x)) x y)))
+        (x64_maxpd x y))
+
 ;;;; Rules for `x86_blendv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (rule (lower (has_type $I8X16
@@ -2021,6 +2036,18 @@
       (let ((cond_result IcmpCondResult (cmp_zero_i128 (CC.Z) c)))
         (select_icmp cond_result x y)))
 
+;; Specializations for floating-point compares to generate a `mins*` or a
+;; `maxs*` instruction. These are equivalent to the "pseudo-m{in,ax}"
+;; specializations for vectors.
+(rule 2 (lower (has_type $F32 (select (maybe_uextend (fcmp (FloatCC.LessThan) x y)) x y)))
+        (x64_minss x y))
+(rule 2 (lower (has_type $F64 (select (maybe_uextend (fcmp (FloatCC.LessThan) x y)) x y)))
+        (x64_minsd x y))
+(rule 3 (lower (has_type $F32 (select (maybe_uextend (fcmp (FloatCC.LessThan) y x)) x y)))
+        (x64_maxss x y))
+(rule 3 (lower (has_type $F64 (select (maybe_uextend (fcmp (FloatCC.LessThan) y x)) x y)))
+        (x64_maxsd x y))
+
 ;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; If available, we can use a plain lzcnt instruction here. Note no
@@ -2677,28 +2704,6 @@
             (final Xmm (x64_andnpd nan_fraction_mask max_blended_nan_positive)))
         final))
 
-;; Rules for `fmin_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule (lower (has_type $F32 (fmin_pseudo x y)))
-      (x64_minss y x))
-(rule (lower (has_type $F64 (fmin_pseudo x y)))
-      (x64_minsd y x))
-(rule (lower (has_type $F32X4 (fmin_pseudo x y)))
-      (x64_minps y x))
-(rule (lower (has_type $F64X2 (fmin_pseudo x y)))
-      (x64_minpd y x))
-
-;; Rules for `fmax_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(rule (lower (has_type $F32 (fmax_pseudo x y)))
-      (x64_maxss y x))
-(rule (lower (has_type $F64 (fmax_pseudo x y)))
-      (x64_maxsd y x))
-(rule (lower (has_type $F32X4 (fmax_pseudo x y)))
-      (x64_maxps y x))
-(rule (lower (has_type $F64X2 (fmax_pseudo x y)))
-      (x64_maxpd y x))
-
 ;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; Base case for fma is to call out to one of two libcalls. For vectors they
 
@@ -43,4 +43,3 @@
 (rule (simplify (bitselect ty @ (multi_lane _ _) (sge _ x y) y x)) (smin ty x y))
 (rule (simplify (bitselect ty @ (multi_lane _ _) (ugt _ x y) y x)) (umin ty x y))
 (rule (simplify (bitselect ty @ (multi_lane _ _) (uge _ x y) y x)) (umin ty x y))
-