// Find most common element to initialize vector with. This is to avoid
// unnecessary vinsert/valign for cases where the same value is present
// many times. Creates a histogram of the vector's elements to find the
- // most common element n.
+ // most common element.
assert(4*Words.size() == Subtarget.getVectorLength());
- int VecHist[32];
- int n = 0;
+ SmallVector<int,32> VecHist(32);
+ int MaxAt = 0;
for (unsigned i = 0; i != NumWords; ++i) {
VecHist[i] = 0;
if (Words[i].isUndef())
if (Words[i] == Words[j])
VecHist[i]++;
- if (VecHist[i] > VecHist[n])
- n = i;
+ if (VecHist[i] > VecHist[MaxAt])
+ MaxAt = i;
}
- SDValue HalfV = getZero(dl, VecTy, DAG);
- if (VecHist[n] > 1) {
- SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
- HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
- {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
- }
- SDValue HalfV0 = HalfV;
- SDValue HalfV1 = HalfV;
-
- // Construct two halves in parallel, then or them together. Rn and Rm count
- // number of rotations needed before the next element. One last rotation is
- // performed post-loop to position the last element.
- int Rn = 0, Rm = 0;
- SDValue Sn, Sm;
- SDValue N = HalfV0;
- SDValue M = HalfV1;
- for (unsigned i = 0; i != NumWords/2; ++i) {
-
+ // If each value is different, don't do splat, just insert them one by one.
+ bool NoSplat = VecHist[MaxAt] <= 1;
+ SDValue RotV = NoSplat
+ ? DAG.getUNDEF(VecTy)
+ : DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[MaxAt]);
+ int Rn = 0;
+ for (unsigned i = 0; i != NumWords; ++i) {
// Rotate by element count since last insertion.
- if (Words[i] != Words[n] || VecHist[n] <= 1) {
- Sn = DAG.getConstant(Rn, dl, MVT::i32);
- HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
- N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
- {HalfV0, Words[i]});
+ if (NoSplat || Words[i] != Words[MaxAt]) {
+ RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy,
+ {RotV, DAG.getConstant(Rn, dl, MVT::i32)});
+ RotV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, Words[i]});
Rn = 0;
}
- if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
- Sm = DAG.getConstant(Rm, dl, MVT::i32);
- HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
- M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
- {HalfV1, Words[i+NumWords/2]});
- Rm = 0;
- }
Rn += 4;
- Rm += 4;
}
// Perform last rotation.
- Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
- Sm = DAG.getConstant(Rm, dl, MVT::i32);
- HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
- HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
-
- SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
- SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
-
- SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
-
- SDValue OutV =
- DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
- return OutV;
+ return DAG.getNode(HexagonISD::VROR, dl, VecTy,
+ {RotV, DAG.getConstant(Rn, dl, MVT::i32)});
}
SDValue
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: {
-; CHECK-NEXT: r3:2 = combine(#20,#9)
-; CHECK-NEXT: v0 = vxor(v0,v0)
-; CHECK-NEXT: r1 = #24
-; CHECK-NEXT: r4 = #12
+; CHECK-NEXT: r3:2 = combine(#76,#7)
+; CHECK-NEXT: r1 = #12
+; CHECK-NEXT: r4 = #9
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v1 = vror(v0,r1)
+; CHECK-NEXT: v0 = vror(v0,r1)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v1.w = vinsert(r2)
-; CHECK-NEXT: r4 = #7
-; CHECK-NEXT: r2 = #116
-; CHECK-NEXT: v0 = vror(v0,r4)
+; CHECK-NEXT: v0.w = vinsert(r2)
+; CHECK-NEXT: r2 = #20
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v0.w = vinsert(r4)
+; CHECK-NEXT: v0 = vror(v0,r3)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v1 = vror(v1,r3)
+; CHECK-NEXT: v0.w = vinsert(r4)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v1.w = vinsert(r0)
; CHECK-NEXT: v0 = vror(v0,r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v1 = vror(v1,r3)
+; CHECK-NEXT: v0.w = vinsert(r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v0 = vor(v0,v1)
+; CHECK-NEXT: v0 = vror(v0,r2)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%v0 = insertelement <32 x i32> undef, i32 undef, i32 0