From: Nemanja Ivanovic Date: Tue, 21 Dec 2021 20:28:41 +0000 (-0600) Subject: [PowerPC] Fix vector equality comparison for v2i64 pre-Power8 X-Git-Tag: upstream/15.0.7~22443 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1674d9b6b2da914619c7c197336bb74f7988cf38;p=platform%2Fupstream%2Fllvm.git [PowerPC] Fix vector equality comparison for v2i64 pre-Power8 The current code makes the assumption that equality comparison can be performed with a word comparison instruction. While this is true if the entire 64-bit results are used, it does not generally work. It is possible that the low order words and high order words produce different results and a user of only one will get the wrong result. This patch adds an and of the result words so that each word has the result of the comparison of the entire doubleword that contains it. Differential revision: https://reviews.llvm.org/D115678 --- diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index d6ee6a9..8d6edf0 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3500,15 +3500,16 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (LHS.getValueType() == MVT::v2i64) { // Equality can be handled by casting to the legal type for Altivec // comparisons, everything else needs to be expanded. - if (CC == ISD::SETEQ || CC == ISD::SETNE) { - return DAG.getNode( - ISD::BITCAST, dl, MVT::v2i64, - DAG.getSetCC(dl, MVT::v4i32, - DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS), - DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC)); - } - - return SDValue(); + if (CC != ISD::SETEQ && CC != ISD::SETNE) + return SDValue(); + SDValue SetCC32 = DAG.getSetCC( + dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS), + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC); + int ShuffV[] = {1, 0, 3, 2}; + SDValue Shuff = + DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV); + return DAG.getBitcast( + MVT::v2i64, DAG.getNode(ISD::AND, dl, MVT::v4i32, Shuff, SetCC32)); } // We handle most of these in the usual way. diff --git a/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll b/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll new file mode 100644 index 0000000..b63b142 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc-aix- < %s | \ +; RUN: FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64le-- < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK_LE +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-- < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK_P8LE +define i1 @foo(<2 x i64> %a) #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 35, 34 +; CHECK-NEXT: lwz 3, L..C0(2) # %const.0 +; CHECK-NEXT: vcmpequw 2, 2, 3 +; CHECK-NEXT: lxvw4x 35, 0, 3 +; CHECK-NEXT: addi 3, 1, -16 +; CHECK-NEXT: vperm 3, 2, 2, 3 +; CHECK-NEXT: xxland 0, 35, 34 +; CHECK-NEXT: stxvw4x 0, 0, 3 +; CHECK-NEXT: lwz 3, -12(1) +; CHECK-NEXT: blr +; +; CHECK_LE-LABEL: foo: +; CHECK_LE: # %bb.0: # %entry +; CHECK_LE-NEXT: xxswapd 35, 34 +; CHECK_LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK_LE-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK_LE-NEXT: vcmpequw 2, 2, 3 +; CHECK_LE-NEXT: lvx 3, 0, 3 +; CHECK_LE-NEXT: addi 3, 1, -16 +; CHECK_LE-NEXT: vperm 3, 2, 2, 3 +; CHECK_LE-NEXT: xxland 34, 35, 34 +; CHECK_LE-NEXT: stvx 2, 0, 3 +; CHECK_LE-NEXT: ld 3, -16(1) +; CHECK_LE-NEXT: blr +; +; CHECK_P8LE-LABEL: foo: +; CHECK_P8LE: # %bb.0: # %entry +; CHECK_P8LE-NEXT: xxswapd 35, 34 +; CHECK_P8LE-NEXT: vcmpequd 2, 2, 3 +; CHECK_P8LE-NEXT: xxswapd 0, 34 +; CHECK_P8LE-NEXT: mffprd 3, 0 +; CHECK_P8LE-NEXT: blr +entry: + %0 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> + %1 = icmp eq <2 x i64> %a, %0 + %2 = extractelement <2 x i1> %1, i32 0 + ret i1 %2 +}