For some targets, it is desirable to prefer scalarizing <N x i1> instead of promoting to a larger legal type, such as <N x i32>.
llvm-svn: 168882
virtual bool isSelectSupported(SelectSupportKind kind) const { return true; }
+ /// shouldSplitVectorElementType - Return true if a vector of the given type
+ /// should be split (TypeSplitVector) instead of promoted
+ /// (TypePromoteInteger) during type legalization.
+ virtual bool shouldSplitVectorElementType(EVT VT) const { return false; }
+
/// isIntDivCheap() - Return true if integer divide is usually cheaper than
/// a sequence of several shifts, adds, and multiplies for this target.
bool isIntDivCheap() const { return IntDivIsCheap; }
// that wider vector type.
EVT EltVT = VT.getVectorElementType();
unsigned NElts = VT.getVectorNumElements();
- if (NElts != 1) {
+ if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) {
bool IsLegalWiderType = false;
// First try to promote the elements of integer vectors. If no legal
// promotion was found, fallback to the widen-vector method.
}
}
+bool NVPTXTargetLowering::shouldSplitVectorElementType(EVT VT) const {
+ return VT == MVT::i1;
+}
SDValue
NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
virtual unsigned getFunctionAlignment(const Function *F) const;
virtual EVT getSetCCResultType(EVT VT) const {
+ if (VT.isVector())
+ return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
return MVT::i1;
}
return MVT::i32;
}
+ virtual bool shouldSplitVectorElementType(EVT VT) const;
+
private:
const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
--- /dev/null
+; RUN: llc < %s -march=nvptx -mcpu=sm_20
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20
+
+; This test makes sure that the result of vector compares are properly
+; scalarized. If codegen fails, then the type legalizer incorrectly
+; tried to promote <2 x i1> to <2 x i8> and instruction selection failed.
+
+define void @foo(<2 x i32>* %a, <2 x i32>* %b, i32* %r1, i32* %r2) {
+ %aval = load <2 x i32>* %a
+ %bval = load <2 x i32>* %b
+ %res = icmp slt <2 x i32> %aval, %bval
+ %t1 = extractelement <2 x i1> %res, i32 0
+ %t2 = extractelement <2 x i1> %res, i32 1
+ %t1a = zext i1 %t1 to i32
+ %t2a = zext i1 %t2 to i32
+ store i32 %t1a, i32* %r1
+ store i32 %t2a, i32* %r2
+ ret void
+}