const EVT VecT = Op.getValueType();
const EVT LaneT = Op.getOperand(0).getValueType();
const size_t Lanes = Op.getNumOperands();
+ bool CanSwizzle = Subtarget->hasUnimplementedSIMD128() && VecT == MVT::v16i8;
+
+ // BUILD_VECTORs are lowered to the instruction that initializes the highest
+ // possible number of lanes at once followed by a sequence of replace_lane
+ // instructions to individually initialize any remaining lanes.
+
+ // TODO: Tune this. For example, lanewise swizzling is very expensive, so
+ // swizzled lanes should be given greater weight.
+
+ // TODO: Investigate building vectors by shuffling together vectors built by
+ // separately specialized means.
+
auto IsConstant = [](const SDValue &V) {
return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
};
- // Find the most common operand, which is approximately the best to splat
- using Entry = std::pair<SDValue, size_t>;
- SmallVector<Entry, 16> ValueCounts;
- size_t NumConst = 0, NumDynamic = 0;
- for (const SDValue &Lane : Op->op_values()) {
- if (Lane.isUndef()) {
- continue;
- } else if (IsConstant(Lane)) {
- NumConst++;
- } else {
- NumDynamic++;
- }
- auto CountIt = std::find_if(ValueCounts.begin(), ValueCounts.end(),
- [&Lane](Entry A) { return A.first == Lane; });
- if (CountIt == ValueCounts.end()) {
- ValueCounts.emplace_back(Lane, 1);
+ // Returns the source vector and index vector pair if they exist. Checks for:
+ // (extract_vector_elt
+ // $src,
+ // (sign_extend_inreg (extract_vector_elt $indices, $i))
+ // )
+ auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
+ auto Bail = std::make_pair(SDValue(), SDValue());
+ if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return Bail;
+ const SDValue &SwizzleSrc = Lane->getOperand(0);
+ const SDValue &IndexExt = Lane->getOperand(1);
+ if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
+ return Bail;
+ const SDValue &Index = IndexExt->getOperand(0);
+ if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return Bail;
+ const SDValue &SwizzleIndices = Index->getOperand(0);
+ if (SwizzleSrc.getValueType() != MVT::v16i8 ||
+ SwizzleIndices.getValueType() != MVT::v16i8 ||
+ Index->getOperand(1)->getOpcode() != ISD::Constant ||
+ Index->getConstantOperandVal(1) != I)
+ return Bail;
+ return std::make_pair(SwizzleSrc, SwizzleIndices);
+ };
+
+ using ValueEntry = std::pair<SDValue, size_t>;
+ SmallVector<ValueEntry, 16> SplatValueCounts;
+
+ using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
+ SmallVector<SwizzleEntry, 16> SwizzleCounts;
+
+ auto AddCount = [](auto &Counts, const auto &Val) {
+ auto CountIt = std::find_if(Counts.begin(), Counts.end(),
+ [&Val](auto E) { return E.first == Val; });
+ if (CountIt == Counts.end()) {
+ Counts.emplace_back(Val, 1);
} else {
CountIt->second++;
}
+ };
+
+ auto GetMostCommon = [](auto &Counts) {
+ auto CommonIt =
+ std::max_element(Counts.begin(), Counts.end(),
+ [](auto A, auto B) { return A.second < B.second; });
+ assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
+ return *CommonIt;
+ };
+
+ size_t NumConstantLanes = 0;
+
+ // Count eligible lanes for each type of vector creation op
+ for (size_t I = 0; I < Lanes; ++I) {
+ const SDValue &Lane = Op->getOperand(I);
+ if (Lane.isUndef())
+ continue;
+
+ AddCount(SplatValueCounts, Lane);
+
+ if (IsConstant(Lane)) {
+ NumConstantLanes++;
+ } else if (CanSwizzle) {
+ auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
+ if (SwizzleSrcs.first)
+ AddCount(SwizzleCounts, SwizzleSrcs);
+ }
}
- auto CommonIt =
- std::max_element(ValueCounts.begin(), ValueCounts.end(),
- [](Entry A, Entry B) { return A.second < B.second; });
- assert(CommonIt != ValueCounts.end() && "Unexpected all-undef build_vector");
- SDValue SplatValue = CommonIt->first;
- size_t NumCommon = CommonIt->second;
-
- // If v128.const is available, consider using it instead of a splat
+
+ SDValue SplatValue;
+ size_t NumSplatLanes;
+ std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
+
+ SDValue SwizzleSrc;
+ SDValue SwizzleIndices;
+ size_t NumSwizzleLanes = 0;
+ if (SwizzleCounts.size())
+ std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
+ NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
+
+ // Predicate returning true if the lane is properly initialized by the
+ // original instruction
+ std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
+ SDValue Result;
if (Subtarget->hasUnimplementedSIMD128()) {
- // {i32,i64,f32,f64}.const opcode, and value
- const size_t ConstBytes = 1 + std::max(size_t(4), 16 / Lanes);
- // SIMD prefix and opcode
- const size_t SplatBytes = 2;
- const size_t SplatConstBytes = SplatBytes + ConstBytes;
- // SIMD prefix, opcode, and lane index
- const size_t ReplaceBytes = 3;
- const size_t ReplaceConstBytes = ReplaceBytes + ConstBytes;
- // SIMD prefix, v128.const opcode, and 128-bit value
- const size_t VecConstBytes = 18;
- // Initial v128.const and a replace_lane for each non-const operand
- const size_t ConstInitBytes = VecConstBytes + NumDynamic * ReplaceBytes;
- // Initial splat and all necessary replace_lanes
- const size_t SplatInitBytes =
- IsConstant(SplatValue)
- // Initial constant splat
- ? (SplatConstBytes +
- // Constant replace_lanes
- (NumConst - NumCommon) * ReplaceConstBytes +
- // Dynamic replace_lanes
- (NumDynamic * ReplaceBytes))
- // Initial dynamic splat
- : (SplatBytes +
- // Constant replace_lanes
- (NumConst * ReplaceConstBytes) +
- // Dynamic replace_lanes
- (NumDynamic - NumCommon) * ReplaceBytes);
- if (ConstInitBytes < SplatInitBytes) {
- // Create build_vector that will lower to initial v128.const
+ // Prefer swizzles over vector consts over splats
+ if (NumSwizzleLanes >= NumSplatLanes &&
+ NumSwizzleLanes >= NumConstantLanes) {
+ Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
+ SwizzleIndices);
+ auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
+ IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
+ return Swizzled == GetSwizzleSrcs(I, Lane);
+ };
+ } else if (NumConstantLanes >= NumSplatLanes) {
SmallVector<SDValue, 16> ConstLanes;
for (const SDValue &Lane : Op->op_values()) {
if (IsConstant(Lane)) {
ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
}
}
- SDValue Result = DAG.getBuildVector(VecT, DL, ConstLanes);
- // Add replace_lane instructions for non-const lanes
- for (size_t I = 0; I < Lanes; ++I) {
- const SDValue &Lane = Op->getOperand(I);
- if (!Lane.isUndef() && !IsConstant(Lane))
- Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
- DAG.getConstant(I, DL, MVT::i32));
- }
- return Result;
+ Result = DAG.getBuildVector(VecT, DL, ConstLanes);
+ IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
+ return IsConstant(Lane);
+ };
}
}
- // Use a splat for the initial vector
- SDValue Result;
- // Possibly a load_splat
- LoadSDNode *SplattedLoad;
- if (Subtarget->hasUnimplementedSIMD128() &&
- (SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
- SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
- Result = DAG.getNode(WebAssemblyISD::LOAD_SPLAT, DL, VecT, SplatValue);
- } else {
- Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
+ if (!Result) {
+ // Use a splat, but possibly a load_splat
+ LoadSDNode *SplattedLoad;
+ if (Subtarget->hasUnimplementedSIMD128() &&
+ (SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
+ SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
+ Result = DAG.getNode(WebAssemblyISD::LOAD_SPLAT, DL, VecT, SplatValue);
+ } else {
+ Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
+ }
+ IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
+ return Lane == SplatValue;
+ };
}
- // Add replace_lane instructions for other values
+
+ // Add replace_lane instructions for any unhandled values
for (size_t I = 0; I < Lanes; ++I) {
const SDValue &Lane = Op->getOperand(I);
- if (Lane != SplatValue)
+ if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
DAG.getConstant(I, DL, MVT::i32));
}
+
return Result;
}
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
-; CHECK-LABEL: same_const_one_replaced_i8x16:
-; CHECK-NEXT: .functype same_const_one_replaced_i8x16 (i32) -> (v128)
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 42
-; CHECK-NEXT: i16x8.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
-; CHECK-NEXT: i16x8.replace_lane $push[[L2:[0-9]+]]=, $pop[[L1]], 5, $0
-; CHECK-NEXT: return $pop[[L2]]
-define <8 x i16> @same_const_one_replaced_i8x16(i16 %x) {
+; CHECK-LABEL: same_const_one_replaced_i16x8:
+; CHECK-NEXT: .functype same_const_one_replaced_i16x8 (i32) -> (v128)
+; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42
+; CHECK-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
+; CHECK-NEXT: return $pop[[L1]]
+define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) {
%v = insertelement
<8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>,
i16 %x,
ret <8 x i16> %v
}
-; CHECK-LABEL: different_const_one_replaced_i8x16:
-; CHECK-NEXT: .functype different_const_one_replaced_i8x16 (i32) -> (v128)
+; CHECK-LABEL: different_const_one_replaced_i16x8:
+; CHECK-NEXT: .functype different_const_one_replaced_i16x8 (i32) -> (v128)
; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8
; CHECK-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
; CHECK-NEXT: return $pop[[L1]]
-define <8 x i16> @different_const_one_replaced_i8x16(i16 %x) {
+define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) {
%v = insertelement
<8 x i16> <i16 1, i16 -2, i16 3, i16 -4, i16 5, i16 -6, i16 7, i16 -8>,
i16 %x,
; CHECK-LABEL: same_const_one_replaced_f32x4:
; CHECK-NEXT: .functype same_const_one_replaced_f32x4 (f32) -> (v128)
-; CHECK-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.5p5
-; CHECK-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
-; CHECK-NEXT: f32x4.replace_lane $push[[L2:[0-9]+]]=, $pop[[L1]], 2, $0
-; CHECK-NEXT: return $pop[[L2]]
+; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.5p5, 0x1.5p5, 0x0p0, 0x1.5p5
+; CHECK-NEXT: f32x4.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0
+; CHECK-NEXT: return $pop[[L1]]
define <4 x float> @same_const_one_replaced_f32x4(float %x) {
%v = insertelement
<4 x float> <float 42., float 42., float 42., float 42.>,
; CHECK-LABEL: splat_common_const_i32x4:
; CHECK-NEXT: .functype splat_common_const_i32x4 () -> (v128)
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 3
-; CHECK-NEXT: i32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
-; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 1
-; CHECK-NEXT: i32x4.replace_lane $push[[L3:[0-9]+]]=, $pop[[L1]], 3, $pop[[L2]]
-; CHECK-NEXT: return $pop[[L3]]
+; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0, 3, 3, 1
+; CHECK-NEXT: return $pop[[L0]]
define <4 x i32> @splat_common_const_i32x4() {
ret <4 x i32> <i32 undef, i32 3, i32 3, i32 1>
}
ret <8 x i16> %v7
}
+; CHECK-LABEL: swizzle_one_i8x16:
+; CHECK-NEXT: .functype swizzle_one_i8x16 (v128, v128) -> (v128)
+; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1
+; CHECK-NEXT: return $pop[[L0]]
+define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) {
+ %m0 = extractelement <16 x i8> %mask, i32 0
+ %s0 = extractelement <16 x i8> %src, i8 %m0
+ %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
+ ret <16 x i8> %v0
+}
+
+; CHECK-LABEL: swizzle_all_i8x16:
+; CHECK-NEXT: .functype swizzle_all_i8x16 (v128, v128) -> (v128)
+; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1
+; CHECK-NEXT: return $pop[[L0]]
+define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) {
+ %m0 = extractelement <16 x i8> %mask, i32 0
+ %s0 = extractelement <16 x i8> %src, i8 %m0
+ %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
+ %m1 = extractelement <16 x i8> %mask, i32 1
+ %s1 = extractelement <16 x i8> %src, i8 %m1
+ %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 1
+ %m2 = extractelement <16 x i8> %mask, i32 2
+ %s2 = extractelement <16 x i8> %src, i8 %m2
+ %v2 = insertelement <16 x i8> %v1, i8 %s2, i32 2
+ %m3 = extractelement <16 x i8> %mask, i32 3
+ %s3 = extractelement <16 x i8> %src, i8 %m3
+ %v3 = insertelement <16 x i8> %v2, i8 %s3, i32 3
+ %m4 = extractelement <16 x i8> %mask, i32 4
+ %s4 = extractelement <16 x i8> %src, i8 %m4
+ %v4 = insertelement <16 x i8> %v3, i8 %s4, i32 4
+ %m5 = extractelement <16 x i8> %mask, i32 5
+ %s5 = extractelement <16 x i8> %src, i8 %m5
+ %v5 = insertelement <16 x i8> %v4, i8 %s5, i32 5
+ %m6 = extractelement <16 x i8> %mask, i32 6
+ %s6 = extractelement <16 x i8> %src, i8 %m6
+ %v6 = insertelement <16 x i8> %v5, i8 %s6, i32 6
+ %m7 = extractelement <16 x i8> %mask, i32 7
+ %s7 = extractelement <16 x i8> %src, i8 %m7
+ %v7 = insertelement <16 x i8> %v6, i8 %s7, i32 7
+ %m8 = extractelement <16 x i8> %mask, i32 8
+ %s8 = extractelement <16 x i8> %src, i8 %m8
+ %v8 = insertelement <16 x i8> %v7, i8 %s8, i32 8
+ %m9 = extractelement <16 x i8> %mask, i32 9
+ %s9 = extractelement <16 x i8> %src, i8 %m9
+ %v9 = insertelement <16 x i8> %v8, i8 %s9, i32 9
+ %m10 = extractelement <16 x i8> %mask, i32 10
+ %s10 = extractelement <16 x i8> %src, i8 %m10
+ %v10 = insertelement <16 x i8> %v9, i8 %s10, i32 10
+ %m11 = extractelement <16 x i8> %mask, i32 11
+ %s11 = extractelement <16 x i8> %src, i8 %m11
+ %v11 = insertelement <16 x i8> %v10, i8 %s11, i32 11
+ %m12 = extractelement <16 x i8> %mask, i32 12
+ %s12 = extractelement <16 x i8> %src, i8 %m12
+ %v12 = insertelement <16 x i8> %v11, i8 %s12, i32 12
+ %m13 = extractelement <16 x i8> %mask, i32 13
+ %s13 = extractelement <16 x i8> %src, i8 %m13
+ %v13 = insertelement <16 x i8> %v12, i8 %s13, i32 13
+ %m14 = extractelement <16 x i8> %mask, i32 14
+ %s14 = extractelement <16 x i8> %src, i8 %m14
+ %v14 = insertelement <16 x i8> %v13, i8 %s14, i32 14
+ %m15 = extractelement <16 x i8> %mask, i32 15
+ %s15 = extractelement <16 x i8> %src, i8 %m15
+ %v15 = insertelement <16 x i8> %v14, i8 %s15, i32 15
+ ret <16 x i8> %v15
+}
+
+; CHECK-LABEL: swizzle_one_i16x8:
+; CHECK-NEXT: .functype swizzle_one_i16x8 (v128, v128) -> (v128)
+; CHECK-NOT: swizzle
+; CHECK: return
+define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) {
+ %m0 = extractelement <8 x i16> %mask, i32 0
+ %s0 = extractelement <8 x i16> %src, i16 %m0
+ %v0 = insertelement <8 x i16> undef, i16 %s0, i32 0
+ ret <8 x i16> %v0
+}
+
+; CHECK-LABEL: mashup_swizzle_i8x16:
+; CHECK-NEXT: .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128)
+; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1
+; CHECK: i8x16.replace_lane
+; CHECK: i8x16.replace_lane
+; CHECK: i8x16.replace_lane
+; CHECK: i8x16.replace_lane
+; CHECK: return
+define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
+ ; swizzle 0
+ %m0 = extractelement <16 x i8> %mask, i32 0
+ %s0 = extractelement <16 x i8> %src, i8 %m0
+ %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
+ ; swizzle 7
+ %m1 = extractelement <16 x i8> %mask, i32 7
+ %s1 = extractelement <16 x i8> %src, i8 %m1
+ %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 7
+ ; splat 3
+ %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 3
+ ; splat 12
+ %v3 = insertelement <16 x i8> %v2, i8 %splatted, i32 12
+ ; const 4
+ %v4 = insertelement <16 x i8> %v3, i8 42, i32 4
+ ; const 14
+ %v5 = insertelement <16 x i8> %v4, i8 42, i32 14
+ ret <16 x i8> %v5
+}
+
+; CHECK-LABEL: mashup_const_i8x16:
+; CHECK-NEXT: .functype mashup_const_i8x16 (v128, v128, i32) -> (v128)
+; CHECK: v128.const $push[[L0:[0-9]+]]=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0
+; CHECK: i8x16.replace_lane
+; CHECK: i8x16.replace_lane
+; CHECK: i8x16.replace_lane
+; CHECK: return
+define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
+ ; swizzle 0
+ %m0 = extractelement <16 x i8> %mask, i32 0
+ %s0 = extractelement <16 x i8> %src, i8 %m0
+ %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
+ ; splat 3
+ %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3
+ ; splat 12
+ %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12
+ ; const 4
+ %v3 = insertelement <16 x i8> %v2, i8 42, i32 4
+ ; const 14
+ %v4 = insertelement <16 x i8> %v3, i8 42, i32 14
+ ret <16 x i8> %v4
+}
+
+; CHECK-LABEL: mashup_splat_i8x16:
+; CHECK-NEXT: .functype mashup_splat_i8x16 (v128, v128, i32) -> (v128)
+; CHECK: i8x16.splat $push[[L0:[0-9]+]]=, $2
+; CHECK: i8x16.replace_lane
+; CHECK: i8x16.replace_lane
+; CHECK: return
+define <16 x i8> @mashup_splat_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
+ ; swizzle 0
+ %m0 = extractelement <16 x i8> %mask, i32 0
+ %s0 = extractelement <16 x i8> %src, i8 %m0
+ %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
+ ; splat 3
+ %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3
+ ; splat 12
+ %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12
+ ; const 4
+ %v3 = insertelement <16 x i8> %v2, i8 42, i32 4
+ ret <16 x i8> %v3
+}
+
; CHECK-LABEL: undef_const_insert_f32x4:
; CHECK-NEXT: .functype undef_const_insert_f32x4 () -> (v128)
-; CHECK-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.5p5
-; CHECK-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
-; CHECK-NEXT: return $pop[[L1]]
+; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x0p0, 0x1.5p5, 0x0p0, 0x0p0
+; CHECK-NEXT: return $pop[[L0]]
define <4 x float> @undef_const_insert_f32x4() {
%v = insertelement <4 x float> undef, float 42., i32 1
ret <4 x float> %v