From: Sanjay Patel Date: Wed, 28 Sep 2022 16:13:34 +0000 (-0400) Subject: [InstSimplify][PhaseOrdering] add tests for vector select of min/max; NFC X-Git-Tag: upstream/17.0.6~32100 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8bfba17b409000deae6436f92080d0c96bf5ea3b;p=platform%2Fupstream%2Fllvm.git [InstSimplify][PhaseOrdering] add tests for vector select of min/max; NFC The phase ordering test is the almost unoptimized IR for the example in issue #42100; it was passed through -mem2reg to reduce obvious excessive load/store and other noise. D134879 --- diff --git a/llvm/test/Transforms/InstSimplify/select-maxmin.ll b/llvm/test/Transforms/InstSimplify/select-maxmin.ll index 957aee7..da91f9f 100644 --- a/llvm/test/Transforms/InstSimplify/select-maxmin.ll +++ b/llvm/test/Transforms/InstSimplify/select-maxmin.ll @@ -5,6 +5,10 @@ declare i8 @llvm.smin.i8(i8, i8) declare i8 @llvm.smax.i8(i8, i8) declare i8 @llvm.umin.i8(i8, i8) declare i8 @llvm.umax.i8(i8, i8) +declare <4 x i8> @llvm.smin.v4i8(<4 x i8>, <4 x i8>) +declare <4 x i8> @llvm.smax.v4i8(<4 x i8>, <4 x i8>) +declare <4 x i8> @llvm.umin.v4i8(<4 x i8>, <4 x i8>) +declare <4 x i8> @llvm.umax.v4i8(<4 x i8>, <4 x i8>) ; smin @@ -1937,3 +1941,191 @@ define i8 @eq_yx_umax_tval_wrong_op(i8 %x, i8 %y, i8 %z) { %r = select i1 %i, i8 %m, i8 %x ret i8 %r } + +; TODO: select with smin pred + +define <4 x i8> @slt_xy_smin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @slt_xy_smin_select_y_shuf_fval( +; CHECK-NEXT: [[I:%.*]] = icmp slt <4 x i8> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]]) +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]] +; CHECK-NEXT: ret <4 x i8> [[R]] +; + %i = icmp slt <4 x i8> %x, %y + %m = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %x, <4 x i8> %y) + %s = shufflevector <4 x i8> %y, <4 x i8> %m, <4 x i32> + %r = select <4 x i1> %i, <4 x i8> %x, <4 x i8> %s + ret <4 x i8> %r +} + +define <4 x i8> @sgt_xy_smin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @sgt_xy_smin_select_y_shuf_fval( +; CHECK-NEXT: [[I:%.*]] = icmp sgt <4 x i8> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]]) +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]] +; CHECK-NEXT: ret <4 x i8> [[R]] +; + %i = icmp sgt <4 x i8> %x, %y + %m = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %x, <4 x i8> %y) + %s = shufflevector <4 x i8> %y, <4 x i8> %m, <4 x i32> + %r = select <4 x i1> %i, <4 x i8> %x, <4 x i8> %s + ret <4 x i8> %r +} + +define <4 x i8> @slt_xy_smin_select_x_shuf_fval(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @slt_xy_smin_select_x_shuf_fval( +; CHECK-NEXT: [[I:%.*]] = icmp slt <4 x i8> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]]) +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[M]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]] +; CHECK-NEXT: ret <4 x i8> [[R]] +; + %i = icmp slt <4 x i8> %x, %y + %m = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %x, <4 x i8> %y) + %s = shufflevector <4 x i8> %x, <4 x i8> %m, <4 x i32> + %r = select <4 x i1> %i, <4 x i8> %x, <4 x i8> %s + ret <4 x i8> %r +} + +; TODO: select with non-strict smax pred + +define <4 x i8> @sge_xy_smax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @sge_xy_smax_select_y_shuf_fval( +; CHECK-NEXT: [[I:%.*]] = icmp sge <4 x i8> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[Y]], <4 x i8> [[X]]) +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]] +; CHECK-NEXT: ret <4 x i8> [[R]] +; + %i = icmp sge <4 x i8> %x, %y + %m = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %y, <4 x i8> %x) + %s = shufflevector <4 x i8> %y, <4 x i8> %m, <4 x i32> + %r = select <4 x i1> %i, <4 x i8> %x, <4 x i8> %s + ret <4 x i8> %r +} + +define <4 x i8> @sle_yx_smax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @sle_yx_smax_select_y_shuf_fval( +; CHECK-NEXT: [[I:%.*]] = icmp sge <4 x i8> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[Y]], <4 x i8> [[X]]) +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]] +; CHECK-NEXT: ret <4 x i8> [[R]] +; + %i = icmp sge <4 x i8> %y, %x + %m = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %y, <4 x i8> %x) + %s = shufflevector <4 x i8> %y, <4 x i8> %m, <4 x i32> + %r = select <4 x i1> %i, <4 x i8> %x, <4 x i8> %s + ret <4 x i8> %r +} + +define <4 x i8> @sge_xy_smax_select_x_shuf_fval(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @sge_xy_smax_select_x_shuf_fval( +; CHECK-NEXT: [[I:%.*]] = icmp sge <4 x i8> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[Y]], <4 x i8> [[X]]) +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[M]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]] +; CHECK-NEXT: ret <4 x i8> [[R]] +; + %i = icmp sge <4 x i8> %x, %y + %m = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %y, <4 x i8> %x) + %s = shufflevector <4 x i8> %x, <4 x i8> %m, <4 x i32> + %r = select <4 x i1> %i, <4 x i8> %x, <4 x i8> %s + ret <4 x i8> %r +} + +; TODO: select with non-strict inverted umin pred + +define <4 x i8> @uge_xy_umin_select_y_shuf_tval(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @uge_xy_umin_select_y_shuf_tval( +; CHECK-NEXT: [[I:%.*]] = icmp uge <4 x i8> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]]) +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[M]], <4 x i8> [[Y]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[S]], <4 x i8> [[X]] +; CHECK-NEXT: ret <4 x i8> [[R]] +; + %i = icmp uge <4 x i8> %x, %y + %m = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %x, <4 x i8> %y) + %s = shufflevector <4 x i8> %m, <4 x i8> %y, <4 x i32> + %r = select <4 x i1> %i, <4 x i8> %s, <4 x i8> %x + ret <4 x i8> %r +} + +define <4 x i8> @uge_xy_umin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @uge_xy_umin_select_y_shuf_fval( +; CHECK-NEXT: [[I:%.*]] = icmp uge <4 x i8> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]]) +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[M]], <4 x i8> [[Y]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]] +; CHECK-NEXT: ret <4 x i8> [[R]] +; + %i = icmp uge <4 x i8> %x, %y + %m = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %x, <4 x i8> %y) + %s = shufflevector <4 x i8> %m, <4 x i8> %y, <4 x i32> + %r = select <4 x i1> %i, <4 x i8> %x, <4 x i8> %s + ret <4 x i8> %r +} + +define <4 x i8> @uge_xy_umin_select_x_shuf_tval(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @uge_xy_umin_select_x_shuf_tval( +; CHECK-NEXT: [[I:%.*]] = icmp uge <4 x i8> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]]) +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[M]], <4 x i8> [[X]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[S]], <4 x i8> [[X]] +; CHECK-NEXT: ret <4 x i8> [[R]] +; + %i = icmp uge <4 x i8> %x, %y + %m = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %x, <4 x i8> %y) + %s = shufflevector <4 x i8> %m, <4 x i8> %x, <4 x i32> + %r = select <4 x i1> %i, <4 x i8> %s, <4 x i8> %x + ret <4 x i8> %r +} + +; TODO: select with swapped umax pred + +define <4 x i8> @ult_yx_umax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @ult_yx_umax_select_y_shuf_fval( +; CHECK-NEXT: [[I:%.*]] = icmp ult <4 x i8> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[Y]], <4 x i8> [[X]]) +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]] +; CHECK-NEXT: ret <4 x i8> [[R]] +; + %i = icmp ult <4 x i8> %y, %x + %m = call <4 x i8> @llvm.umax.v4i8(<4 x i8> %y, <4 x i8> %x) + %s = shufflevector <4 x i8> %y, <4 x i8> %m, <4 x i32> + %r = select <4 x i1> %i, <4 x i8> %x, <4 x i8> %s + ret <4 x i8> %r +} + +define <4 x i8> @ult_yx_umax_select_y_shuf_tval(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @ult_yx_umax_select_y_shuf_tval( +; CHECK-NEXT: [[I:%.*]] = icmp ult <4 x i8> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[Y]], <4 x i8> [[X]]) +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[S]], <4 x i8> [[X]] +; CHECK-NEXT: ret <4 x i8> [[R]] +; + %i = icmp ult <4 x i8> %y, %x + %m = call <4 x i8> @llvm.umax.v4i8(<4 x i8> %y, <4 x i8> %x) + %s = shufflevector <4 x i8> %y, <4 x i8> %m, <4 x i32> + %r = select <4 x i1> %i, <4 x i8> %s, <4 x i8> %x + ret <4 x i8> %r +} + +define <4 x i8> @ult_yx_umax_select_y_shuf_mask_fval(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @ult_yx_umax_select_y_shuf_mask_fval( +; CHECK-NEXT: [[I:%.*]] = icmp ult <4 x i8> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[Y]], <4 x i8> [[X]]) +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]] +; CHECK-NEXT: ret <4 x i8> [[R]] +; + %i = icmp ult <4 x i8> %y, %x + %m = call <4 x i8> @llvm.umax.v4i8(<4 x i8> %y, <4 x i8> %x) + %s = shufflevector <4 x i8> %y, <4 x i8> %m, <4 x i32> + %r = select <4 x i1> %i, <4 x i8> %x, <4 x i8> %s + ret <4 x i8> %r +} diff --git a/llvm/test/Transforms/PhaseOrdering/vector-select.ll b/llvm/test/Transforms/PhaseOrdering/vector-select.ll index 84876b2..3817be8 100644 --- a/llvm/test/Transforms/PhaseOrdering/vector-select.ll +++ b/llvm/test/Transforms/PhaseOrdering/vector-select.ll @@ -90,5 +90,54 @@ define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) ret <4 x i8> %sel } +define <4 x i32> @PR42100(<4 x i32> noundef %x, <4 x i32> noundef %min) { +; CHECK-LABEL: @PR42100( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[MIN:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[X]], <4 x i32> [[MIN]]) +; CHECK-NEXT: [[MIN_ADDR_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[MIN]], <4 x i32> +; CHECK-NEXT: [[SEL3:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[X]], <4 x i32> [[MIN_ADDR_1]] +; CHECK-NEXT: [[MIN_ADDR_1_1:%.*]] = shufflevector <4 x i32> [[MIN_ADDR_1]], <4 x i32> [[SEL3]], <4 x i32> +; CHECK-NEXT: [[SEL4:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[X]], <4 x i32> [[MIN_ADDR_1_1]] +; CHECK-NEXT: [[MIN_ADDR_1_2:%.*]] = shufflevector <4 x i32> [[MIN_ADDR_1_1]], <4 x i32> [[SEL4]], <4 x i32> +; CHECK-NEXT: [[SEL5:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[X]], <4 x i32> [[MIN_ADDR_1_2]] +; CHECK-NEXT: [[MIN_ADDR_1_3:%.*]] = shufflevector <4 x i32> [[MIN_ADDR_1_2]], <4 x i32> [[SEL5]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[MIN_ADDR_1_3]] +; +entry: + br label %for.cond + +for.cond: + %min.addr.0 = phi <4 x i32> [ %min, %entry ], [ %min.addr.1, %for.inc ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp ne i32 %i.0, 4 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + br label %for.end + +for.body: + %vecext = extractelement <4 x i32> %x, i32 %i.0 + %vecext1 = extractelement <4 x i32> %min.addr.0, i32 %i.0 + %cmp2 = icmp slt i32 %vecext, %vecext1 + br i1 %cmp2, label %if.then, label %if.end + +if.then: + %vecext3 = extractelement <4 x i32> %x, i32 %i.0 + %vecins = insertelement <4 x i32> %min.addr.0, i32 %vecext3, i32 %i.0 + br label %if.end + +if.end: + %min.addr.1 = phi <4 x i32> [ %vecins, %if.then ], [ %min.addr.0, %for.body ] + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: + ret <4 x i32> %min.addr.0 +} + declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1