EVT OpTy = N->getOperand(1).getValueType();
if (N->getOpcode() == ISD::VSELECT)
- if (SDValue Res = WidenVSELECTAndMask(N))
- return Res;
+ if (SDValue Res = WidenVSELECTMask(N))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ Res, N->getOperand(1), N->getOperand(2));
// Promote all the way up to the canonical SetCC type.
EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy;
SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
SDValue WidenVecRes_ScalarOp(SDNode* N);
SDValue WidenVecRes_SELECT(SDNode* N);
- SDValue WidenVSELECTAndMask(SDNode *N);
+ SDValue WidenVSELECTMask(SDNode *N);
SDValue WidenVecRes_SELECT_CC(SDNode* N);
SDValue WidenVecRes_SETCC(SDNode* N);
SDValue WidenVecRes_STRICT_FSETCC(SDNode* N);
SDValue Cond = N->getOperand(0);
CL = CH = Cond;
if (Cond.getValueType().isVector()) {
- if (SDValue Res = WidenVSELECTAndMask(N))
- std::tie(CL, CH) = DAG.SplitVector(Res->getOperand(0), dl);
+ if (SDValue Res = WidenVSELECTMask(N))
+ std::tie(CL, CH) = DAG.SplitVector(Res, dl);
// Check if there are already splitted versions of the vector available and
// use those instead of splitting the mask operand again.
else if (getTypeAction(Cond.getValueType()) ==
return Mask;
}
-// This method tries to handle VSELECT and its mask by legalizing operands
-// (which may require widening) and if needed adjusting the mask vector type
-// to match that of the VSELECT. Without it, many cases end up with
-// scalarization of the SETCC, with many unnecessary instructions.
-SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
+// This method tries to handle some special cases for the vselect mask
+// and if needed adjusting the mask vector type to match that of the VSELECT.
+// Without it, many cases end up with scalarization of the SETCC, with many
+// unnecessary instructions.
+SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) {
LLVMContext &Ctx = *DAG.getContext();
SDValue Cond = N->getOperand(0);
return SDValue();
}
- // Get the VT and operands for VSELECT, and widen if needed.
- SDValue VSelOp1 = N->getOperand(1);
- SDValue VSelOp2 = N->getOperand(2);
- if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector) {
+ // Widen the vselect result type if needed.
+ if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector)
VSelVT = TLI.getTypeToTransformTo(Ctx, VSelVT);
- VSelOp1 = GetWidenedVector(VSelOp1);
- VSelOp2 = GetWidenedVector(VSelOp2);
- }
// The mask of the VSELECT should have integer elements.
EVT ToMaskVT = VSelVT;
} else
return SDValue();
- return DAG.getNode(ISD::VSELECT, SDLoc(N), VSelVT, Mask, VSelOp1, VSelOp2);
+ return Mask;
}
SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
SDValue Cond1 = N->getOperand(0);
EVT CondVT = Cond1.getValueType();
if (CondVT.isVector()) {
- if (SDValue Res = WidenVSELECTAndMask(N))
- return Res;
+ if (SDValue WideCond = WidenVSELECTMask(N)) {
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+ assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ WidenVT, WideCond, InOp1, InOp2);
+ }
EVT CondEltVT = CondVT.getVectorElementType();
EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -O3 | FileCheck %s
+
+target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
+
+@g_150 = external dso_local unnamed_addr global [9 x i32], align 4
+@g_317 = external dso_local unnamed_addr global [1 x [10 x [8 x i32]]], align 4
+
+define dso_local void @main() local_unnamed_addr {
+; CHECK-LABEL: main:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: lhi %r0, 0
+; CHECK-NEXT: strl %r0, g_317+296
+; CHECK-NEXT: lhi %r0, 6
+; CHECK-NEXT: strl %r0, g_150+12
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb1, %bb
+ br i1 undef, label %bb2, label %bb1
+
+bb2: ; preds = %bb1
+ store i32 0, i32* getelementptr inbounds ([1 x [10 x [8 x i32]]], [1 x [10 x [8 x i32]]]* @g_317, i64 0, i64 0, i64 9, i64 2), align 4
+ %i = load i32, i32* getelementptr inbounds ([1 x [10 x [8 x i32]]], [1 x [10 x [8 x i32]]]* @g_317, i64 0, i64 0, i64 9, i64 2), align 4
+ %i3 = insertelement <8 x i32> undef, i32 %i, i32 0
+ %i4 = shufflevector <8 x i32> %i3, <8 x i32> undef, <8 x i32> zeroinitializer
+ %i5 = add nsw <8 x i8> zeroinitializer, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>
+ %i6 = zext <8 x i8> %i5 to <8 x i32>
+ %i7 = icmp slt <8 x i32> undef, %i6
+ %i8 = or <8 x i1> zeroinitializer, %i7
+ %i9 = select <8 x i1> %i8, <8 x i32> zeroinitializer, <8 x i32> %i4
+ %i10 = shl <8 x i32> %i6, %i9
+ %i11 = xor <8 x i32> %i10, zeroinitializer
+ %i12 = xor <8 x i32> %i11, zeroinitializer
+ %i13 = xor <8 x i32> %i12, zeroinitializer
+ %i14 = extractelement <8 x i32> %i13, i32 0
+ %i15 = xor i32 %i14, 0
+ %i16 = xor i32 %i15, 0
+ %i17 = shl i32 %i16, 24
+ %i18 = ashr exact i32 %i17, 24
+ store i32 %i18, i32* getelementptr inbounds ([9 x i32], [9 x i32]* @g_150, i64 0, i64 3), align 4
+ unreachable
+}
+
+attributes #0 = { "use-soft-float"="false" }