static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
SDValue V2);
-static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
- SelectionDAG &DAG, SDLoc dl,
- unsigned vectorWidth) {
- assert((vectorWidth == 128 || vectorWidth == 256) &&
- "Unsupported vector width");
- EVT VT = Vec.getValueType();
- EVT ElVT = VT.getVectorElementType();
- unsigned Factor = VT.getSizeInBits()/vectorWidth;
- EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
- VT.getVectorNumElements()/Factor);
-
- // Extract from UNDEF is UNDEF.
- if (Vec.getOpcode() == ISD::UNDEF)
- return DAG.getUNDEF(ResultVT);
-
- // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
- unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
-
- // This is the index of the first element of the vectorWidth-bit chunk
- // we want.
- unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
- * ElemsPerChunk);
-
- // If the input is a buildvector just emit a smaller one.
- if (Vec.getOpcode() == ISD::BUILD_VECTOR)
- return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
- makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
- ElemsPerChunk));
-
- SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
-}
-
-/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
-/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
-/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
-/// instructions or a simple subregister reference. Idx is an index in the
-/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
-/// lowering EXTRACT_VECTOR_ELT operations easier.
-static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
- SelectionDAG &DAG, SDLoc dl) {
- assert((Vec.getValueType().is256BitVector() ||
- Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
- return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
-}
-
-/// Generate a DAG to grab 256-bits from a 512-bit vector.
-static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
- SelectionDAG &DAG, SDLoc dl) {
- assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
- return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
-}
-
-static SDValue InsertSubVector(SDValue Result, SDValue Vec,
- unsigned IdxVal, SelectionDAG &DAG,
- SDLoc dl, unsigned vectorWidth) {
- assert((vectorWidth == 128 || vectorWidth == 256) &&
- "Unsupported vector width");
- // Inserting UNDEF is Result
- if (Vec.getOpcode() == ISD::UNDEF)
- return Result;
- EVT VT = Vec.getValueType();
- EVT ElVT = VT.getVectorElementType();
- EVT ResultVT = Result.getValueType();
-
- // Insert the relevant vectorWidth bits.
- unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
-
- // This is the index of the first element of the vectorWidth-bit chunk
- // we want.
- unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
- * ElemsPerChunk);
-
- SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
-}
-
-/// Generate a DAG to put 128-bits into a vector > 128 bits. This
-/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
-/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
-/// simple superregister reference. Idx is an index in the 128 bits
-/// we want. It need not be aligned to a 128-bit boundary. That makes
-/// lowering INSERT_VECTOR_ELT operations easier.
-static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
- SelectionDAG &DAG, SDLoc dl) {
- assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
-
- // For insertion into the zero index (low half) of a 256-bit vector, it is
- // more efficient to generate a blend with immediate instead of an insert*128.
- // We are still creating an INSERT_SUBVECTOR below with an undef node to
- // extend the subvector to the size of the result vector. Make sure that
- // we are not recursing on that node by checking for undef here.
- if (IdxVal == 0 && Result.getValueType().is256BitVector() &&
- Result.getOpcode() != ISD::UNDEF) {
- EVT ResultVT = Result.getValueType();
- SDValue ZeroIndex = DAG.getIntPtrConstant(0);
- SDValue Undef = DAG.getUNDEF(ResultVT);
- SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Undef,
- Vec, ZeroIndex);
-
- // The blend instruction, and therefore its mask, depend on the data type.
- MVT ScalarType = ResultVT.getScalarType().getSimpleVT();
- if (ScalarType.isFloatingPoint()) {
- // Choose either vblendps (float) or vblendpd (double).
- unsigned ScalarSize = ScalarType.getSizeInBits();
- assert((ScalarSize == 64 || ScalarSize == 32) && "Unknown float type");
- unsigned MaskVal = (ScalarSize == 64) ? 0x03 : 0x0f;
- SDValue Mask = DAG.getConstant(MaskVal, MVT::i8);
- return DAG.getNode(X86ISD::BLENDI, dl, ResultVT, Result, Vec256, Mask);
- }
-
- const X86Subtarget &Subtarget =
- static_cast<const X86Subtarget &>(DAG.getSubtarget());
-
- // AVX2 is needed for 256-bit integer blend support.
- // Integers must be cast to 32-bit because there is only vpblendd;
- // vpblendw can't be used for this because it has a handicapped mask.
-
- // If we don't have AVX2, then cast to float. Using a wrong domain blend
- // is still more efficient than using the wrong domain vinsertf128 that
- // will be created by InsertSubVector().
- MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
-
- SDValue Mask = DAG.getConstant(0x0f, MVT::i8);
- Vec256 = DAG.getNode(ISD::BITCAST, dl, CastVT, Vec256);
- Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask);
- return DAG.getNode(ISD::BITCAST, dl, ResultVT, Vec256);
- }
-
- return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
-}
-
-static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
- SelectionDAG &DAG, SDLoc dl) {
- assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
- return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
-}
-
-/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
-/// instructions. This is used because creating CONCAT_VECTOR nodes of
-/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
-/// large BUILD_VECTORS.
-static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
- unsigned NumElems, SelectionDAG &DAG,
- SDLoc dl) {
- SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
- return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
-}
-
-static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
- unsigned NumElems, SelectionDAG &DAG,
- SDLoc dl) {
- SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
- return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
-}
-
X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
+static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl,
+ unsigned vectorWidth) {
+ assert((vectorWidth == 128 || vectorWidth == 256) &&
+ "Unsupported vector width");
+ EVT VT = Vec.getValueType();
+ EVT ElVT = VT.getVectorElementType();
+ unsigned Factor = VT.getSizeInBits()/vectorWidth;
+ EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
+ VT.getVectorNumElements()/Factor);
+
+ // Extract from UNDEF is UNDEF.
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(ResultVT);
+
+ // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
+ unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
+
+ // This is the index of the first element of the vectorWidth-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
+ * ElemsPerChunk);
+
+ // If the input is a buildvector just emit a smaller one.
+ if (Vec.getOpcode() == ISD::BUILD_VECTOR)
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
+ makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
+ ElemsPerChunk));
+
+ SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
+}
+
+/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
+/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
+/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
+/// instructions or a simple subregister reference. Idx is an index in the
+/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
+/// lowering EXTRACT_VECTOR_ELT operations easier.
+static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl) {
+ assert((Vec.getValueType().is256BitVector() ||
+ Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
+ return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
+}
+
+/// Generate a DAG to grab 256-bits from a 512-bit vector.
+static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl) {
+ assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
+ return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
+}
+
+static SDValue InsertSubVector(SDValue Result, SDValue Vec,
+ unsigned IdxVal, SelectionDAG &DAG,
+ SDLoc dl, unsigned vectorWidth) {
+ assert((vectorWidth == 128 || vectorWidth == 256) &&
+ "Unsupported vector width");
+ // Inserting UNDEF is Result
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return Result;
+ EVT VT = Vec.getValueType();
+ EVT ElVT = VT.getVectorElementType();
+ EVT ResultVT = Result.getValueType();
+
+ // Insert the relevant vectorWidth bits.
+ unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
+
+ // This is the index of the first element of the vectorWidth-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
+ * ElemsPerChunk);
+
+ SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
+}
+
+/// Generate a DAG to put 128-bits into a vector > 128 bits. This
+/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
+/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
+/// simple superregister reference. Idx is an index in the 128 bits
+/// we want. It need not be aligned to a 128-bit boundary. That makes
+/// lowering INSERT_VECTOR_ELT operations easier.
+static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl) {
+ assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
+
+ // For insertion into the zero index (low half) of a 256-bit vector, it is
+ // more efficient to generate a blend with immediate instead of an insert*128.
+ // We are still creating an INSERT_SUBVECTOR below with an undef node to
+ // extend the subvector to the size of the result vector. Make sure that
+ // we are not recursing on that node by checking for undef here.
+ if (IdxVal == 0 && Result.getValueType().is256BitVector() &&
+ Result.getOpcode() != ISD::UNDEF) {
+ EVT ResultVT = Result.getValueType();
+ SDValue ZeroIndex = DAG.getIntPtrConstant(0);
+ SDValue Undef = DAG.getUNDEF(ResultVT);
+ SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Undef,
+ Vec, ZeroIndex);
+
+ // The blend instruction, and therefore its mask, depend on the data type.
+ MVT ScalarType = ResultVT.getScalarType().getSimpleVT();
+ if (ScalarType.isFloatingPoint()) {
+ // Choose either vblendps (float) or vblendpd (double).
+ unsigned ScalarSize = ScalarType.getSizeInBits();
+ assert((ScalarSize == 64 || ScalarSize == 32) && "Unknown float type");
+ unsigned MaskVal = (ScalarSize == 64) ? 0x03 : 0x0f;
+ SDValue Mask = DAG.getConstant(MaskVal, MVT::i8);
+ return DAG.getNode(X86ISD::BLENDI, dl, ResultVT, Result, Vec256, Mask);
+ }
+
+ const X86Subtarget &Subtarget =
+ static_cast<const X86Subtarget &>(DAG.getSubtarget());
+
+ // AVX2 is needed for 256-bit integer blend support.
+ // Integers must be cast to 32-bit because there is only vpblendd;
+ // vpblendw can't be used for this because it has a handicapped mask.
+
+ // If we don't have AVX2, then cast to float. Using a wrong domain blend
+ // is still more efficient than using the wrong domain vinsertf128 that
+ // will be created by InsertSubVector().
+ MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
+
+ SDValue Mask = DAG.getConstant(0x0f, MVT::i8);
+ Vec256 = DAG.getNode(ISD::BITCAST, dl, CastVT, Vec256);
+ Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask);
+ return DAG.getNode(ISD::BITCAST, dl, ResultVT, Vec256);
+ }
+
+ return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
+}
+
+static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl) {
+ assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
+ return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
+}
+
+/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
+/// instructions. This is used because creating CONCAT_VECTOR nodes of
+/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
+/// large BUILD_VECTORS.
+static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
+ unsigned NumElems, SelectionDAG &DAG,
+ SDLoc dl) {
+ SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
+ return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
+}
+
+static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
+ unsigned NumElems, SelectionDAG &DAG,
+ SDLoc dl) {
+ SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
+ return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
+}
+
/// getOnesVector - Returns a vector of specified type with all bits set.
/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
/// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.