Skip to content

Commit af35da6

Browse files
egraphs: Add some select optimizations (#6843)
* egraphs: Remove extends before selects when possible * egraphs: Rewrite `select+{i,fcmp}` when select arms match cmp result * egraphs: Simplify double `bmask` * egraphs: Simplify double `bswap` * egraphs: Modularize select optimizations Co-Authored-By: Jamey Sharp <jsharp@fastly.com> * egraphs: Remove `bmask+{i,f}cmp` optimizations These were unsound since we only represent the results of `{i,f}cmp` as `-1` for vectors, and not for scalars, scalars get the output of `1` * egraphs: Optimize `bitrev` in `select` and `bmask` * egraphs: Optimize `popcnt` in `select` and `bmask` * egraphs: Optimize double `bitrev` * egraphs: Add `thruthy` expression matcher Cleans up the previous logic a bit. This also adds `rotl`/`rotr` as thruth preserving expressions. As well as `select` when it's branches match certain constants. Co-Authored-By: Jamey Sharp <jsharp@fastly.com> * egraphs: Make `icmp ne x, 0` a thruthy expression * egraphs: Add `icmp ne x, 0` as an expression that can delete thruthy inputs * egraphs: Add tests for comutative versions of the `icmp ne` thruthy rules * egraphs: Restrict `icmp ne` optimization to arms with the This also cleans up some merge conflicts that happened when rebasing. * egraphs: Restore some accidentally deleted tests * egraphs: Use `t` and `f` as names for select arms Co-authored-by: Jamey Sharp <jamey@minilop.net> * egraphs: Allow `icmp ne` opt to match arms of different types Co-authored-by: Jamey Sharp <jamey@minilop.net> --------- Co-authored-by: Jamey Sharp <jsharp@fastly.com> Co-authored-by: Jamey Sharp <jamey@minilop.net>
1 parent 858c1d8 commit af35da6

File tree

2 files changed

+235
-0
lines changed

2 files changed

+235
-0
lines changed

cranelift/codegen/src/opts/bitops.isle

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,3 +104,44 @@
104104
(rule (simplify (sshr ty (bor ty (ineg ty x) x) (iconst ty (u64_from_imm64 shift_amt))))
105105
(if-let $true (u64_eq shift_amt (u64_sub (ty_bits ty) 1)))
106106
(bmask ty x))
107+
108+
;; Matches any expressions that preserve "truthiness".
109+
;; i.e. If the input is zero it remains zero, and if it is nonzero it can have
110+
;; a different value as long as it is still nonzero.
111+
(decl pure multi truthy (Value) Value)
112+
(rule (truthy (sextend _ x)) x)
113+
(rule (truthy (uextend _ x)) x)
114+
(rule (truthy (bmask _ x)) x)
115+
(rule (truthy (ineg _ x)) x)
116+
(rule (truthy (bswap _ x)) x)
117+
(rule (truthy (bitrev _ x)) x)
118+
(rule (truthy (popcnt _ x)) x)
119+
(rule (truthy (rotl _ x _)) x)
120+
(rule (truthy (rotr _ x _)) x)
121+
(rule (truthy (select _ x (iconst _ (u64_from_imm64 (u64_nonzero _))) (iconst _ (u64_from_imm64 0)))) x)
122+
;; (ne ty (iconst 0) v) is also canonicalized into this form via another rule
123+
(rule (truthy (ne _ x (iconst _ (u64_from_imm64 0)))) x)
124+
125+
;; All of these expressions don't care about their input as long as it is truthy.
126+
;; so we can remove expressions that preserve that property from the input.
127+
(rule (simplify (bmask ty v)) (if-let x (truthy v)) (bmask ty x))
128+
(rule (simplify (select ty v t f)) (if-let c (truthy v)) (select ty c t f))
129+
;; (ne ty (iconst 0) v) is also canonicalized into this form via another rule
130+
(rule (simplify (ne cty v (iconst _ (u64_from_imm64 0))))
131+
(if-let c (truthy v))
132+
(if-let (value_type ty) c)
133+
(ne cty c (iconst ty (imm64 0))))
134+
135+
136+
137+
;; (sextend (bmask x)) can be replaced with (bmask x) since bmask
138+
;; supports any size of output type, regardless of input.
139+
;; Same with `ireduce`
140+
(rule (simplify (sextend ty (bmask _ x))) (bmask ty x))
141+
(rule (simplify (ireduce ty (bmask _ x))) (bmask ty x))
142+
143+
;; (bswap (bswap x)) == x
144+
(rule (simplify (bswap ty (bswap ty x))) (subsume x))
145+
146+
;; (bitrev (bitrev x)) == x
147+
(rule (simplify (bitrev ty (bitrev ty x))) (subsume x))

cranelift/filetests/filetests/egraph/bitops.clif

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,3 +178,197 @@ block0(v0: i64):
178178

179179
; check: v5 = bmask.i64 v0
180180
; check: return v5
181+
182+
183+
function %double_bmask(i16) -> i64 {
184+
block0(v0: i16):
185+
v1 = bmask.i32 v0
186+
v2 = bmask.i64 v1
187+
return v2
188+
}
189+
190+
; check: v3 = bmask.i64 v0
191+
; check: return v3
192+
193+
function %bmask_sextend(i16) -> i64 {
194+
block0(v0: i16):
195+
v1 = sextend.i32 v0
196+
v2 = bmask.i64 v1
197+
return v2
198+
}
199+
200+
; check: v3 = bmask.i64 v0
201+
; check: return v3
202+
203+
function %bmask_uextend(i16) -> i64 {
204+
block0(v0: i16):
205+
v1 = uextend.i32 v0
206+
v2 = bmask.i64 v1
207+
return v2
208+
}
209+
210+
; check: v3 = bmask.i64 v0
211+
; check: return v3
212+
213+
function %bmask_ineg(i16) -> i64 {
214+
block0(v0: i16):
215+
v1 = ineg.i16 v0
216+
v2 = bmask.i64 v1
217+
return v2
218+
}
219+
220+
; check: v3 = bmask.i64 v0
221+
; check: return v3
222+
223+
function %bmask_bswap(i16) -> i64 {
224+
block0(v0: i16):
225+
v1 = bswap.i16 v0
226+
v2 = bmask.i64 v1
227+
return v2
228+
}
229+
230+
; check: v3 = bmask.i64 v0
231+
; check: return v3
232+
233+
function %bmask_bitrev(i16) -> i64 {
234+
block0(v0: i16):
235+
v1 = bitrev.i16 v0
236+
v2 = bmask.i64 v1
237+
return v2
238+
}
239+
240+
; check: v3 = bmask.i64 v0
241+
; check: return v3
242+
243+
function %bmask_popcnt(i16) -> i64 {
244+
block0(v0: i16):
245+
v1 = popcnt.i16 v0
246+
v2 = bmask.i64 v1
247+
return v2
248+
}
249+
250+
; check: v3 = bmask.i64 v0
251+
; check: return v3
252+
253+
function %bmask_rotl(i16, i16) -> i64 {
254+
block0(v0: i16, v1: i16):
255+
v2 = rotl.i16 v0, v1
256+
v3 = bmask.i64 v2
257+
return v3
258+
}
259+
260+
; check: v4 = bmask.i64 v0
261+
; check: return v4
262+
263+
function %bmask_rotr(i16, i16) -> i64 {
264+
block0(v0: i16, v1: i16):
265+
v2 = rotr.i16 v0, v1
266+
v3 = bmask.i64 v2
267+
return v3
268+
}
269+
270+
; check: v4 = bmask.i64 v0
271+
; check: return v4
272+
273+
function %bmask_select_non_zero(i16) -> i64 {
274+
block0(v0: i16):
275+
v1 = iconst.i16 123
276+
v2 = iconst.i16 0
277+
v3 = select v0, v1, v2
278+
v4 = bmask.i64 v3
279+
return v4
280+
}
281+
282+
; check: v5 = bmask.i64 v0
283+
; check: return v5
284+
285+
function %bmask_icmp_ne_zero(i16) -> i64 {
286+
block0(v0: i16):
287+
v1 = iconst.i16 0
288+
v2 = icmp ne v0, v1
289+
v3 = bmask.i64 v2
290+
return v3
291+
}
292+
293+
; check: v4 = bmask.i64 v0
294+
; check: return v4
295+
296+
function %icmp_ne_zero_deletes_thruthy_input(i16) -> i8 {
297+
block0(v0: i16):
298+
v1 = popcnt v0
299+
v2 = iconst.i16 0
300+
v3 = icmp ne v1, v2
301+
return v3
302+
}
303+
304+
; check: v2 = iconst.i16 0
305+
; check: v4 = icmp ne v0, v2
306+
; check: return v4
307+
308+
309+
function %icmp_ne_zero_deletes_thruthy_input_const_lhs(i16) -> i8 {
310+
block0(v0: i16):
311+
v1 = iconst.i16 0
312+
v2 = popcnt v0
313+
v3 = icmp ne v1, v2
314+
return v3
315+
}
316+
317+
; check: v1 = iconst.i16 0
318+
; check: v5 = icmp ne v0, v1
319+
; check: return v5
320+
321+
322+
function %icmp_ne_matches_arms_of_different_types(i16) -> i8 {
323+
block0(v0: i16):
324+
v1 = iconst.i64 0
325+
v2 = uextend.i64 v0
326+
v3 = icmp ne v1, v2
327+
return v3
328+
}
329+
330+
; check: v5 = iconst.i16 0
331+
; check: v6 = icmp ne v0, v5
332+
; check: return v6
333+
334+
335+
336+
function %sextend_bmask(i16) -> i64 {
337+
block0(v0: i16):
338+
v1 = bmask.i32 v0
339+
v2 = sextend.i64 v1
340+
return v2
341+
}
342+
343+
; check: v3 = bmask.i64 v0
344+
; check: return v3
345+
346+
function %ireduce_bmask(i16) -> i8 {
347+
block0(v0: i16):
348+
v1 = bmask.i32 v0
349+
v2 = ireduce.i8 v1
350+
return v2
351+
}
352+
353+
; check: v3 = bmask.i8 v0
354+
; check: return v3
355+
356+
357+
function %double_bswap(i64) -> i64 {
358+
block0(v0: i64):
359+
v1 = bswap.i64 v0
360+
v2 = bswap.i64 v1
361+
return v2
362+
}
363+
364+
; check: return v0
365+
366+
367+
function %double_bitrev(i64) -> i64 {
368+
block0(v0: i64):
369+
v1 = bitrev.i64 v0
370+
v2 = bitrev.i64 v1
371+
return v2
372+
}
373+
374+
; check: return v0

0 commit comments

Comments
 (0)