Skip to content

Commit 25ea16e

Browse files
committed
cranelift: Optimize some vselect+fcmp into f{min,max}_pseudo
1 parent 3fa4baa commit 25ea16e

File tree

3 files changed

+73
-1
lines changed

3 files changed

+73
-1
lines changed

cranelift/codegen/src/opts/algebraic.isle

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,6 @@
272272
(umin ty x y))
273273

274274

275-
276275
;; Transform vselect-of-icmp into {u,s}{min,max} instructions where possible.
277276
(rule (simplify
278277
(vselect ty (icmp _ (IntCC.SignedGreaterThan) x y) x y))
@@ -298,3 +297,27 @@
298297
(rule (simplify
299298
(vselect ty (icmp _ (IntCC.UnsignedLessThanOrEqual) x y) x y))
300299
(umin ty x y))
300+
301+
302+
303+
;; For floats convert fcmp lt into pseudo_min and gt into pseudo_max
304+
;;
305+
;; fmax_pseudo docs state:
306+
;; The behaviour for this operations is defined as fmax_pseudo(a, b) = (a < b) ? b : a, and the behaviour for zero
307+
;; or NaN inputs follows from the behaviour of < with such inputs.
308+
;;
309+
;; That is exactly the operation that we match here!
310+
(rule (simplify
311+
(select ty (fcmp _ (FloatCC.LessThan) x y) x y))
312+
(fmin_pseudo ty x y))
313+
(rule (simplify
314+
(select ty (fcmp _ (FloatCC.GreaterThan) x y) x y))
315+
(fmax_pseudo ty x y))
316+
317+
;; Do the same for vectors
318+
(rule (simplify
319+
(vselect ty (fcmp _ (FloatCC.LessThan) x y) x y))
320+
(fmin_pseudo ty x y))
321+
(rule (simplify
322+
(vselect ty (fcmp _ (FloatCC.GreaterThan) x y) x y))
323+
(fmax_pseudo ty x y))

cranelift/filetests/filetests/egraph/select.clif

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,28 @@ block0(v0: i32, v1: i32, v2: i32, v3: i32):
115115
; check: v11 = icmp ule v0, v1
116116
; check: v5 = select v11, v2, v3
117117
; check: return v5
118+
119+
120+
121+
122+
function %select_fcmp_gt_to_fmax_pseudo(f32, f32) -> f32 {
123+
block0(v0: f32, v1: f32):
124+
v2 = fcmp gt v0, v1
125+
v3 = select v2, v0, v1
126+
return v3
127+
}
128+
129+
; check: block0(v0: f32, v1: f32):
130+
; check: v4 = fmax_pseudo v0, v1
131+
; check: return v4
132+
133+
function %select_fcmp_lt_to_fmin_pseudo(f32, f32) -> f32 {
134+
block0(v0: f32, v1: f32):
135+
v2 = fcmp lt v0, v1
136+
v3 = select v2, v0, v1
137+
return v3
138+
}
139+
140+
; check: block0(v0: f32, v1: f32):
141+
; check: v4 = fmin_pseudo v0, v1
142+
; check: return v4

cranelift/filetests/filetests/egraph/vselect.clif

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,27 @@ block0(v0: i32x4, v1: i32x4, v2: i32x4, v3: i32x4):
114114
; check: v11 = icmp ule v0, v1
115115
; check: v5 = vselect v11, v2, v3
116116
; check: return v5
117+
118+
119+
120+
function %vselect_fcmp_gt_to_fmax_pseudo(f32x4, f32x4) -> f32x4 {
121+
block0(v0: f32x4, v1: f32x4):
122+
v2 = fcmp gt v0, v1
123+
v3 = vselect v2, v0, v1
124+
return v3
125+
}
126+
127+
; check: block0(v0: f32x4, v1: f32x4):
128+
; check: v4 = fmax_pseudo v0, v1
129+
; check: return v4
130+
131+
function %vselect_fcmp_lt_to_fmin_pseudo(f32x4, f32x4) -> f32x4 {
132+
block0(v0: f32x4, v1: f32x4):
133+
v2 = fcmp lt v0, v1
134+
v3 = vselect v2, v0, v1
135+
return v3
136+
}
137+
138+
; check: block0(v0: f32x4, v1: f32x4):
139+
; check: v4 = fmin_pseudo v0, v1
140+
; check: return v4

0 commit comments

Comments
 (0)