From 3802c4af596d1e708a999ddb3e965e9b60820ad6 Mon Sep 17 00:00:00 2001 From: "Kevin B. Smith" Date: Thu, 7 Apr 2016 16:15:34 +0000 Subject: [PATCH] [X86]: Fix for PR27251. Differential Revision: http://reviews.llvm.org/D18850 llvm-svn: 265690 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 21 ++++++++++++++++++--- llvm/test/CodeGen/X86/vector-blend.ll | 6 ++++-- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 836caa9..aa4cbd4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -27338,9 +27338,24 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, if (V) { assert(EltBits == 8 || EltBits == 16 || EltBits == 32); - return DAG.getBitcast( - VT, DAG.getNode(ISD::SUB, DL, MaskVT, - DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask), Mask)); + SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask); + SDValue SubOp2 = Mask; + + // If the negate was on the false side of the select, then + // the operands of the SUB need to be swapped. PR 27251. + // This is because the pattern being matched above is + // (vselect M, (sub (0, X), X) -> (sub (xor X, M), M) + // but if the pattern matched was + // (vselect M, X, (sub (0, X))), that is really negation of the pattern + // above, -(vselect M, (sub 0, X), X), and therefore the replacement + // pattern also needs to be a negation of the replacement pattern above. + // And -(sub X, Y) is just sub (Y, X), so swapping the operands of the + // sub accomplishes the negation of the replacement pattern. + if (V == Y) + std::swap(SubOp1, SubOp2); + + return DAG.getBitcast(VT, + DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2)); } } diff --git a/llvm/test/CodeGen/X86/vector-blend.ll b/llvm/test/CodeGen/X86/vector-blend.ll index 1de6805..3e00612 100644 --- a/llvm/test/CodeGen/X86/vector-blend.ll +++ b/llvm/test/CodeGen/X86/vector-blend.ll @@ -1010,7 +1010,8 @@ define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) { ; SSE2-NEXT: pslld $31, %xmm1 ; SSE2-NEXT: psrad $31, %xmm1 ; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: psubd %xmm1, %xmm0 +; SSE2-NEXT: psubd %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: blend_neg_logic_v4i32_2: @@ -1019,7 +1020,8 @@ define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) { ; SSSE3-NEXT: pslld $31, %xmm1 ; SSSE3-NEXT: psrad $31, %xmm1 ; SSSE3-NEXT: pxor %xmm1, %xmm0 -; SSSE3-NEXT: psubd %xmm1, %xmm0 +; SSSE3-NEXT: psubd %xmm0, %xmm1 +; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: blend_neg_logic_v4i32_2: -- 2.7.4