KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
- // TOOD: Until we have a plan for how to represent demanded elements for
- // scalable vectors, we can just bail out for now.
- if (Op.getValueType().isScalableVector()) {
- unsigned BitWidth = Op.getScalarValueSizeInBits();
- return KnownBits(BitWidth);
- }
-
- APInt DemandedElts = VT.isVector()
+ // Since the number of lanes in a scalable vector is unknown at compile time,
+ // we track one bit which is implicitly broadcast to all lanes. This means
+ // that all lanes in a scalable vector are considered demanded.
+ APInt DemandedElts = VT.isFixedLengthVector()
? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return computeKnownBits(Op, DemandedElts, Depth);
KnownBits Known(BitWidth); // Don't know anything.
- // TOOD: Until we have a plan for how to represent demanded elements for
- // scalable vectors, we can just bail out for now.
- if (Op.getValueType().isScalableVector())
- return Known;
-
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
// We know all of the bits for a constant!
return KnownBits::makeConstant(C->getAPIntValue());
KnownBits Known2;
unsigned NumElts = DemandedElts.getBitWidth();
- assert((!Op.getValueType().isVector() ||
+ assert((!Op.getValueType().isFixedLengthVector() ||
NumElts == Op.getValueType().getVectorNumElements()) &&
"Unexpected vector size");
case ISD::MERGE_VALUES:
return computeKnownBits(Op.getOperand(Op.getResNo()), DemandedElts,
Depth + 1);
+ case ISD::SPLAT_VECTOR: {
+ SDValue SrcOp = Op.getOperand(0);
+ Known = computeKnownBits(SrcOp, Depth + 1);
+ if (SrcOp.getValueSizeInBits() != BitWidth) {
+ assert(SrcOp.getValueSizeInBits() > BitWidth &&
+ "Expected SPLAT_VECTOR implicit truncation");
+ Known = Known.trunc(BitWidth);
+ }
+ break;
+ }
case ISD::BUILD_VECTOR:
+ assert(!Op.getValueType().isScalableVector());
// Collect the known bits that are shared by every demanded vector element.
Known.Zero.setAllBits(); Known.One.setAllBits();
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
}
break;
case ISD::VECTOR_SHUFFLE: {
+ assert(!Op.getValueType().isScalableVector());
// Collect the known bits that are shared by every vector element referenced
// by the shuffle.
APInt DemandedLHS, DemandedRHS;
break;
}
case ISD::CONCAT_VECTORS: {
+ if (Op.getValueType().isScalableVector())
+ break;
// Split DemandedElts and test each of the demanded subvectors.
Known.Zero.setAllBits(); Known.One.setAllBits();
EVT SubVectorVT = Op.getOperand(0).getValueType();
break;
}
case ISD::INSERT_SUBVECTOR: {
+ if (Op.getValueType().isScalableVector())
+ break;
// Demand any elements from the subvector and the remainder from the src its
// inserted into.
SDValue Src = Op.getOperand(0);
// Offset the demanded elts by the subvector index.
SDValue Src = Op.getOperand(0);
// Bail until we can represent demanded elements for scalable vectors.
- if (Src.getValueType().isScalableVector())
+ if (Op.getValueType().isScalableVector() || Src.getValueType().isScalableVector())
break;
uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
break;
}
case ISD::SCALAR_TO_VECTOR: {
+ if (Op.getValueType().isScalableVector())
+ break;
// We know about scalar_to_vector as much as we know about it source,
// which becomes the first element of otherwise unknown vector.
if (DemandedElts != 1)
break;
}
case ISD::BITCAST: {
+ if (Op.getValueType().isScalableVector())
+ break;
+
SDValue N0 = Op.getOperand(0);
EVT SubVT = N0.getValueType();
unsigned SubBitWidth = SubVT.getScalarSizeInBits();
if (ISD::isNON_EXTLoad(LD) && Cst) {
// Determine any common known bits from the loaded constant pool value.
Type *CstTy = Cst->getType();
- if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits()) {
+ if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits() &&
+ !Op.getValueType().isScalableVector()) {
// If its a vector splat, then we can (quickly) reuse the scalar path.
// NOTE: We assume all elements match and none are UNDEF.
if (CstTy->isVectorTy()) {
break;
}
case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ if (Op.getValueType().isScalableVector())
+ break;
EVT InVT = Op.getOperand(0).getValueType();
APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
break;
}
case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ if (Op.getValueType().isScalableVector())
+ break;
EVT InVT = Op.getOperand(0).getValueType();
APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
break;
}
case ISD::ANY_EXTEND_VECTOR_INREG: {
+ if (Op.getValueType().isScalableVector())
+ break;
EVT InVT = Op.getOperand(0).getValueType();
APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
break;
}
case ISD::INSERT_VECTOR_ELT: {
+ if (Op.getValueType().isScalableVector())
+ break;
+
// If we know the element index, split the demand between the
// source vector and the inserted element, otherwise assume we need
// the original demanded vector elements and the value.
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_VOID:
+ // TODO: Probably okay to remove after audit; here to reduce change size
+ // in initial enablement patch for scalable vectors
+ if (Op.getValueType().isScalableVector())
+ break;
+
// Allow the target to implement this method for its nodes.
TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth);
break;
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and z1.d, z1.d, #0xff
; CHECK-NEXT: and z0.d, z0.d, #0xff
-; CHECK-NEXT: movprfx z2, z0
-; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d
-; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: lsr z1.d, z2.d, #8
-; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
+; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: lsr z1.d, z0.d, #8
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
-; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
-; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0
-; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 2 x i8>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y)
%b = extractvalue { <vscale x 2 x i8>, <vscale x 2 x i1> } %a, 0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: and z1.s, z1.s, #0xff
; CHECK-NEXT: and z0.s, z0.s, #0xff
-; CHECK-NEXT: movprfx z2, z0
-; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s
-; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: lsr z1.s, z2.s, #8
-; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: lsr z1.s, z0.s, #8
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
-; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0
-; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 4 x i8>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %y)
%b = extractvalue { <vscale x 4 x i8>, <vscale x 4 x i1> } %a, 0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z1.h, z1.h, #0xff
; CHECK-NEXT: and z0.h, z0.h, #0xff
-; CHECK-NEXT: movprfx z2, z0
-; CHECK-NEXT: mul z2.h, p0/m, z2.h, z1.h
-; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: lsr z1.h, z2.h, #8
-; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: lsr z1.h, z0.h, #8
; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0
-; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
-; CHECK-NEXT: mov z2.h, p0/m, #0 // =0x0
-; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 8 x i8>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y)
%b = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i1> } %a, 0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and z1.d, z1.d, #0xffff
; CHECK-NEXT: and z0.d, z0.d, #0xffff
-; CHECK-NEXT: movprfx z2, z0
-; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d
-; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: lsr z1.d, z2.d, #16
-; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
+; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: lsr z1.d, z0.d, #16
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
-; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
-; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0
-; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 2 x i16>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %y)
%b = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i1> } %a, 0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: and z1.s, z1.s, #0xffff
; CHECK-NEXT: and z0.s, z0.s, #0xffff
-; CHECK-NEXT: movprfx z2, z0
-; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s
-; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: lsr z1.s, z2.s, #16
-; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: lsr z1.s, z0.s, #16
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
-; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0
-; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 4 x i16>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %y)
%b = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i1> } %a, 0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
-; CHECK-NEXT: movprfx z2, z0
-; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d
-; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: lsr z1.d, z2.d, #32
-; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
+; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: lsr z1.d, z0.d, #32
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
-; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
-; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0
-; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 2 x i32>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y)
%b = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i1> } %a, 0