From 0c56bc92e4b9cf949f431d1c9e11e9b509ef6dbd Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Mon, 24 Jan 2022 13:19:20 -0600 Subject: [PATCH] [PowerPC] Fix eq/ne comparison of v2i64 pre-Power8 In commit 1674d9b6b2da, I fixed the bug where we didn't consider both words of the result of the comparison. However, the logic needs to be different for eq and ne. Namely for eq, we need both words of the doubleword to equal so it is an AND. OTOH for ne, we need either word to be unequal so it is an OR. --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 5 +- llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll | 114 ++++++++++++++++++++- .../CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll | 2 +- llvm/test/CodeGen/PowerPC/vsx.ll | 6 +- 4 files changed, 117 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 105a435..25cc34b 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3508,8 +3508,9 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { int ShuffV[] = {1, 0, 3, 2}; SDValue Shuff = DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV); - return DAG.getBitcast( - MVT::v2i64, DAG.getNode(ISD::AND, dl, MVT::v4i32, Shuff, SetCC32)); + return DAG.getBitcast(MVT::v2i64, + DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR, + dl, MVT::v4i32, Shuff, SetCC32)); } // We handle most of these in the usual way. diff --git a/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll b/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll index b63b142..e9d792c 100644 --- a/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll +++ b/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll @@ -5,8 +5,8 @@ ; RUN: FileCheck %s --check-prefix=CHECK_LE ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-- < %s | \ ; RUN: FileCheck %s --check-prefix=CHECK_P8LE -define i1 @foo(<2 x i64> %a) #0 { -; CHECK-LABEL: foo: +define i1 @shufeq(<2 x i64> %a) #0 { +; CHECK-LABEL: shufeq: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxswapd 35, 34 ; CHECK-NEXT: lwz 3, L..C0(2) # %const.0 @@ -19,7 +19,7 @@ define i1 @foo(<2 x i64> %a) #0 { ; CHECK-NEXT: lwz 3, -12(1) ; CHECK-NEXT: blr ; -; CHECK_LE-LABEL: foo: +; CHECK_LE-LABEL: shufeq: ; CHECK_LE: # %bb.0: # %entry ; CHECK_LE-NEXT: xxswapd 35, 34 ; CHECK_LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha @@ -33,7 +33,7 @@ define i1 @foo(<2 x i64> %a) #0 { ; CHECK_LE-NEXT: ld 3, -16(1) ; CHECK_LE-NEXT: blr ; -; CHECK_P8LE-LABEL: foo: +; CHECK_P8LE-LABEL: shufeq: ; CHECK_P8LE: # %bb.0: # %entry ; CHECK_P8LE-NEXT: xxswapd 35, 34 ; CHECK_P8LE-NEXT: vcmpequd 2, 2, 3 @@ -46,3 +46,109 @@ entry: %2 = extractelement <2 x i1> %1, i32 0 ret i1 %2 } + +define i1 @shufne(<2 x i64> %a) #0 { +; CHECK-LABEL: shufne: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 35, 34 +; CHECK-NEXT: lwz 3, L..C1(2) # %const.0 +; CHECK-NEXT: vcmpequw 2, 2, 3 +; CHECK-NEXT: lxvw4x 35, 0, 3 +; CHECK-NEXT: addi 3, 1, -16 +; CHECK-NEXT: vperm 3, 2, 2, 3 +; CHECK-NEXT: xxland 0, 35, 34 +; CHECK-NEXT: stxvw4x 0, 0, 3 +; CHECK-NEXT: lwz 3, -12(1) +; CHECK-NEXT: blr +; +; CHECK_LE-LABEL: shufne: +; CHECK_LE: # %bb.0: # %entry +; CHECK_LE-NEXT: xxswapd 35, 34 +; CHECK_LE-NEXT: addis 3, 2, .LCPI1_0@toc@ha +; CHECK_LE-NEXT: addi 3, 3, .LCPI1_0@toc@l +; CHECK_LE-NEXT: vcmpequw 2, 2, 3 +; CHECK_LE-NEXT: lvx 3, 0, 3 +; CHECK_LE-NEXT: addi 3, 1, -16 +; CHECK_LE-NEXT: vperm 3, 2, 2, 3 +; CHECK_LE-NEXT: xxland 34, 35, 34 +; CHECK_LE-NEXT: stvx 2, 0, 3 +; CHECK_LE-NEXT: ld 3, -16(1) +; CHECK_LE-NEXT: blr +; +; CHECK_P8LE-LABEL: shufne: +; CHECK_P8LE: # %bb.0: # %entry +; CHECK_P8LE-NEXT: xxswapd 35, 34 +; CHECK_P8LE-NEXT: vcmpequd 2, 2, 3 +; CHECK_P8LE-NEXT: xxswapd 0, 34 +; CHECK_P8LE-NEXT: mffprd 3, 0 +; CHECK_P8LE-NEXT: blr +entry: + %0 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> + %1 = icmp eq <2 x i64> %a, %0 + %2 = extractelement <2 x i1> %1, i32 0 + ret i1 %2 +} + +define <2 x i64> @cmpeq(<2 x i64> noundef %a, <2 x i64> noundef %b) { +; CHECK-LABEL: cmpeq: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lwz 3, L..C2(2) # %const.0 +; CHECK-NEXT: vcmpequw 2, 2, 3 +; CHECK-NEXT: lxvw4x 35, 0, 3 +; CHECK-NEXT: vperm 3, 2, 2, 3 +; CHECK-NEXT: xxland 34, 35, 34 +; CHECK-NEXT: blr +; +; CHECK_LE-LABEL: cmpeq: +; CHECK_LE: # %bb.0: # %entry +; CHECK_LE-NEXT: vcmpequw 2, 2, 3 +; CHECK_LE-NEXT: addis 3, 2, .LCPI2_0@toc@ha +; CHECK_LE-NEXT: addi 3, 3, .LCPI2_0@toc@l +; CHECK_LE-NEXT: lvx 3, 0, 3 +; CHECK_LE-NEXT: vperm 3, 2, 2, 3 +; CHECK_LE-NEXT: xxland 34, 35, 34 +; CHECK_LE-NEXT: blr +; +; CHECK_P8LE-LABEL: cmpeq: +; CHECK_P8LE: # %bb.0: # %entry +; CHECK_P8LE-NEXT: vcmpequd 2, 2, 3 +; CHECK_P8LE-NEXT: blr +entry: + %cmp = icmp eq <2 x i64> %a, %b + %sext = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %sext +} + +define <2 x i64> @cmpne(<2 x i64> noundef %a, <2 x i64> noundef %b) { +; CHECK-LABEL: cmpne: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vcmpequw 2, 2, 3 +; CHECK-NEXT: lwz 3, L..C3(2) # %const.0 +; CHECK-NEXT: lxvw4x 35, 0, 3 +; CHECK-NEXT: xxlnor 34, 34, 34 +; CHECK-NEXT: vperm 3, 2, 2, 3 +; CHECK-NEXT: xxlor 34, 35, 34 +; CHECK-NEXT: blr +; +; CHECK_LE-LABEL: cmpne: +; CHECK_LE: # %bb.0: # %entry +; CHECK_LE-NEXT: vcmpequw 2, 2, 3 +; CHECK_LE-NEXT: addis 3, 2, .LCPI3_0@toc@ha +; CHECK_LE-NEXT: addi 3, 3, .LCPI3_0@toc@l +; CHECK_LE-NEXT: lvx 3, 0, 3 +; CHECK_LE-NEXT: xxlnor 34, 34, 34 +; CHECK_LE-NEXT: vperm 3, 2, 2, 3 +; CHECK_LE-NEXT: xxlor 34, 35, 34 +; CHECK_LE-NEXT: blr +; +; CHECK_P8LE-LABEL: cmpne: +; CHECK_P8LE: # %bb.0: # %entry +; CHECK_P8LE-NEXT: vcmpequd 2, 2, 3 +; CHECK_P8LE-NEXT: xxlnor 34, 34, 34 +; CHECK_P8LE-NEXT: blr +entry: + %cmp = icmp ne <2 x i64> %a, %b + %sext = sext <2 x i1> %cmp to <2 x i64> + ret <2 x i64> %sext +} + diff --git a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll index eee3ff4..c55eae8 100644 --- a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll +++ b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll @@ -11986,7 +11986,7 @@ define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) { ; PWR7-NEXT: lxvw4x 35, 0, 3 ; PWR7-NEXT: xxlnor 34, 34, 34 ; PWR7-NEXT: vperm 3, 2, 2, 3 -; PWR7-NEXT: xxland 34, 35, 34 +; PWR7-NEXT: xxlor 34, 35, 34 ; PWR7-NEXT: blr ; ; PWR8-LABEL: ugt_1_v2i64: diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll index d882050..10dbd4d 100644 --- a/llvm/test/CodeGen/PowerPC/vsx.ll +++ b/llvm/test/CodeGen/PowerPC/vsx.ll @@ -2079,7 +2079,7 @@ define <2 x i1> @test66(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: lxvw4x v3, 0, r3 ; CHECK-NEXT: xxlnor v2, v2, v2 ; CHECK-NEXT: vperm v3, v2, v2, v3 -; CHECK-NEXT: xxland v2, v3, v2 +; CHECK-NEXT: xxlor v2, v3, v2 ; CHECK-NEXT: blr ; ; CHECK-REG-LABEL: test66: @@ -2090,7 +2090,7 @@ define <2 x i1> @test66(<2 x i64> %a, <2 x i64> %b) { ; CHECK-REG-NEXT: lxvw4x v3, 0, r3 ; CHECK-REG-NEXT: xxlnor v2, v2, v2 ; CHECK-REG-NEXT: vperm v3, v2, v2, v3 -; CHECK-REG-NEXT: xxland v2, v3, v2 +; CHECK-REG-NEXT: xxlor v2, v3, v2 ; CHECK-REG-NEXT: blr ; ; CHECK-FISL-LABEL: test66: @@ -2101,7 +2101,7 @@ define <2 x i1> @test66(<2 x i64> %a, <2 x i64> %b) { ; CHECK-FISL-NEXT: addi r3, r3, .LCPI60_0@toc@l ; CHECK-FISL-NEXT: lxvw4x v2, 0, r3 ; CHECK-FISL-NEXT: vperm v2, v3, v3, v2 -; CHECK-FISL-NEXT: xxland v2, v2, v3 +; CHECK-FISL-NEXT: xxlor v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test66: -- 2.7.4