V1 = peekThroughBitcasts(V1);
V2 = peekThroughBitcasts(V2);
- assert((VT.getSizeInBits() % Mask.size()) == 0 &&
+ assert((VT.getSizeInBits() % Size) == 0 &&
"Illegal split of shuffle value type");
unsigned EltSizeInBits = VT.getSizeInBits() / Size;
/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
/// as many lanes with this technique as possible to simplify the remaining
/// shuffle.
-static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
- SDValue V1, SDValue V2) {
- APInt Zeroable(Mask.size(), 0);
+static void computeZeroableShuffleElements(ArrayRef<int> Mask,
+ SDValue V1, SDValue V2,
+ APInt &KnownUndef, APInt &KnownZero) {
+ int Size = Mask.size();
+ KnownUndef = KnownZero = APInt::getNullValue(Size);
+
V1 = peekThroughBitcasts(V1);
V2 = peekThroughBitcasts(V2);
bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
int VectorSizeInBits = V1.getValueSizeInBits();
- int ScalarSizeInBits = VectorSizeInBits / Mask.size();
+ int ScalarSizeInBits = VectorSizeInBits / Size;
assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
- for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ for (int i = 0; i < Size; ++i) {
int M = Mask[i];
// Handle the easy cases.
- if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
- Zeroable.setBit(i);
+ if (M < 0) {
+ KnownUndef.setBit(i);
+ continue;
+ }
+ if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
+ KnownZero.setBit(i);
continue;
}
if ((Size % V.getNumOperands()) == 0) {
int Scale = Size / V->getNumOperands();
SDValue Op = V.getOperand(M / Scale);
- if (Op.isUndef() || X86::isZeroNode(Op))
- Zeroable.setBit(i);
+ if (Op.isUndef())
+ KnownUndef.setBit(i);
+ if (X86::isZeroNode(Op))
+ KnownZero.setBit(i);
else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
APInt Val = Cst->getAPIntValue();
- Val.lshrInPlace((M % Scale) * ScalarSizeInBits);
- Val = Val.getLoBits(ScalarSizeInBits);
+ Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
if (Val == 0)
- Zeroable.setBit(i);
+ KnownZero.setBit(i);
} else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
APInt Val = Cst->getValueAPF().bitcastToAPInt();
- Val.lshrInPlace((M % Scale) * ScalarSizeInBits);
- Val = Val.getLoBits(ScalarSizeInBits);
+ Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
if (Val == 0)
- Zeroable.setBit(i);
+ KnownZero.setBit(i);
}
continue;
}
// elements must be UNDEF or ZERO.
if ((V.getNumOperands() % Size) == 0) {
int Scale = V->getNumOperands() / Size;
- bool AllZeroable = true;
+ bool AllUndef = true;
+ bool AllZero = true;
for (int j = 0; j < Scale; ++j) {
SDValue Op = V.getOperand((M * Scale) + j);
- AllZeroable &= (Op.isUndef() || X86::isZeroNode(Op));
+ AllUndef &= Op.isUndef();
+ AllZero &= X86::isZeroNode(Op);
}
- if (AllZeroable)
- Zeroable.setBit(i);
+ if (AllUndef)
+ KnownUndef.setBit(i);
+ if (AllZero)
+ KnownZero.setBit(i);
continue;
}
}
-
- return Zeroable;
}
// The Shuffle result is as follow:
// We actually see shuffles that are entirely re-arrangements of a set of
// zero inputs. This mostly happens while decomposing complex shuffles into
// simple ones. Directly lower these as a buildvector of zeros.
- APInt Zeroable = computeZeroableShuffleElements(OrigMask, V1, V2);
+ APInt KnownUndef, KnownZero;
+ computeZeroableShuffleElements(OrigMask, V1, V2, KnownUndef, KnownZero);
+
+ APInt Zeroable = KnownUndef | KnownZero;
if (Zeroable.isAllOnesValue())
return getZeroVector(VT, Subtarget, DAG, DL);