From a1b8c10bf6819e83183c3a4dab55a2261978326a Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 13 Nov 2016 00:32:39 +0000 Subject: [PATCH] [x86] add smin/smax with zero tests These are vector tests corresponding to the discussion at: http://lists.llvm.org/pipermail/llvm-dev/2016-November/106868.html Apart from the lack of min/max matching, the and/andn difference shows a lack of DAG-level canonicalization. llvm-svn: 286737 --- llvm/test/CodeGen/X86/vec_minmax_match.ll | 64 +++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/llvm/test/CodeGen/X86/vec_minmax_match.ll b/llvm/test/CodeGen/X86/vec_minmax_match.ll index d6860c0..6293a61 100644 --- a/llvm/test/CodeGen/X86/vec_minmax_match.ll +++ b/llvm/test/CodeGen/X86/vec_minmax_match.ll @@ -32,6 +32,38 @@ define <4 x i32> @smin_vec2(<4 x i32> %x) { ret <4 x i32> %sel } +; Z = X -nsw Y +; (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0) +define <4 x i32> @smin_vec3(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: smin_vec3: +; CHECK: # BB#0: +; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm2 +; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpandn %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: retq +; + %sub = sub nsw <4 x i32> %x, %y + %cmp = icmp sgt <4 x i32> %x, %y + %sel = select <4 x i1> %cmp, <4 x i32> zeroinitializer, <4 x i32> %sub + ret <4 x i32> %sel +} + +; Z = X -nsw Y +; (X (Z SMIN(Z, 0) +define <4 x i32> @smin_vec4(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: smin_vec4: +; CHECK: # BB#0: +; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm2 +; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: retq +; + %sub = sub nsw <4 x i32> %x, %y + %cmp = icmp slt <4 x i32> %x, %y + %sel = select <4 x i1> %cmp, <4 x i32> %sub, <4 x i32> zeroinitializer + ret <4 x i32> %sel +} + define <4 x i32> @smax_vec1(<4 x i32> %x) { ; CHECK-LABEL: smax_vec1: ; CHECK: # BB#0: @@ -60,6 +92,38 @@ define <4 x i32> @smax_vec2(<4 x i32> %x) { ret <4 x i32> %sel } +; Z = X -nsw Y +; (X (Z SMAX(Z, 0) +define <4 x i32> @smax_vec3(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: smax_vec3: +; CHECK: # BB#0: +; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm2 +; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpandn %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: retq +; + %sub = sub nsw <4 x i32> %x, %y + %cmp = icmp slt <4 x i32> %x, %y + %sel = select <4 x i1> %cmp, <4 x i32> zeroinitializer, <4 x i32> %sub + ret <4 x i32> %sel +} + +; Z = X -nsw Y +; (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0) +define <4 x i32> @smax_vec4(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: smax_vec4: +; CHECK: # BB#0: +; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm2 +; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: retq +; + %sub = sub nsw <4 x i32> %x, %y + %cmp = icmp sgt <4 x i32> %x, %y + %sel = select <4 x i1> %cmp, <4 x i32> %sub, <4 x i32> zeroinitializer + ret <4 x i32> %sel +} + define <4 x i32> @umax_vec1(<4 x i32> %x) { ; CHECK-LABEL: umax_vec1: ; CHECK: # BB#0: -- 2.7.4