From 40c9559b74c709506b16d808b9806b04112a5038 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 18 Aug 2018 18:51:03 +0000 Subject: [PATCH] [X86] Add support for using 512-bit PSUBUS to combineSelect. The code already support 128 and 256 and even knows to split 256 for AVX1. So we really just needed to stop looking for specific VTs and subtarget features and just look for legal VTs with i8/i16 elements. While there, add some curly braces around outer if statement bodies that contain only another if. It makes all the closing curly braces look more regular. llvm-svn: 340128 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 13 ++++++++----- llvm/test/CodeGen/X86/psubus.ll | 6 ++---- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 51151d5..112e9f9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33029,9 +33029,10 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Match VSELECTs into subs with unsigned saturation. if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC && - // psubus is available in SSE2 and AVX2 for i8 and i16 vectors. - ((Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) || - (Subtarget.hasAVX() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) { + // psubus is available in SSE2 for i8 and i16 vectors. + Subtarget.hasSSE2() && + (VT.getVectorElementType() == MVT::i8 || + VT.getVectorElementType() == MVT::i16)) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); // Check if one of the arms of the VSELECT is a zero vector. If it's on the @@ -33062,7 +33063,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS }, SUBUSBuilder); - if (auto *OpRHSBV = dyn_cast(OpRHS)) + if (auto *OpRHSBV = dyn_cast(OpRHS)) { if (isa(CondRHS)) { // If the RHS is a constant we have to reverse the const // canonicalization. @@ -33083,7 +33084,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // FIXME: Would it be better to use computeKnownBits to determine // whether it's safe to decanonicalize the xor? // x s< 0 ? x^C : 0 --> subus x, C - if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) + if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) { if (CC == ISD::SETLT && Other.getOpcode() == ISD::XOR && ISD::isBuildVectorAllZeros(CondRHS.getNode()) && OpRHSConst->getAPIntValue().isSignMask()) { @@ -33093,7 +33094,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS }, SUBUSBuilder); } + } } + } } } diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll index e88a680..72e23a8 100644 --- a/llvm/test/CodeGen/X86/psubus.ll +++ b/llvm/test/CodeGen/X86/psubus.ll @@ -1186,8 +1186,7 @@ define <64 x i8> @test17(<64 x i8> %x, i8 zeroext %w) nounwind { ; AVX512-LABEL: test17: ; AVX512: # %bb.0: # %vector.ph ; AVX512-NEXT: vpbroadcastb %edi, %zmm1 -; AVX512-NEXT: vpcmpnltub %zmm1, %zmm0, %k1 -; AVX512-NEXT: vpsubb %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512-NEXT: vpsubusb %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: retq vector.ph: %0 = insertelement <64 x i8> undef, i8 %w, i32 0 @@ -1236,8 +1235,7 @@ define <32 x i16> @test18(<32 x i16> %x, i16 zeroext %w) nounwind { ; AVX512-LABEL: test18: ; AVX512: # %bb.0: # %vector.ph ; AVX512-NEXT: vpbroadcastw %edi, %zmm1 -; AVX512-NEXT: vpcmpnltuw %zmm1, %zmm0, %k1 -; AVX512-NEXT: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: retq vector.ph: %0 = insertelement <32 x i16> undef, i16 %w, i32 0 -- 2.7.4