// are exactly the same as in s1, which means that they are exactly the
// same as in P. This implies that P == Q.
+ // There can be a situation where there are more entries with the same
+ // bits set than there are set bits (e.g. value 9 occuring more than 2
+ // times). In such cases it will be impossible to complete this to a
+ // perfect shuffle.
+ SmallVector<uint32_t, 8> Sorted(Worklist);
+ llvm::sort(Sorted.begin(), Sorted.end());
+
+ for (unsigned I = 0, E = Sorted.size(); I != E;) {
+ unsigned P = Sorted[I], Count = 1;
+ while (++I != E && P == Sorted[I])
+ ++Count;
+ if (countPopulation(P) < Count) {
+ // Reset all occurences of P, if there are more occurrences of P
+ // than there are bits in P.
+ for_each(Worklist, [P](unsigned &Q) {
+ if (Q == P)
+ Q = 0;
+ });
+ }
+ }
+
return Worklist;
}
Comps[I] = T;
}
+#ifndef NDEBUG
+ // Check that we have generated a valid completion.
+ uint32_t OrAll = 0;
+ for (unsigned I = 0, E = Comps.size(); I != E; ++I) {
+ uint32_t C = Comps[I];
+ assert(isPowerOf2_32(C));
+ OrAll |= C;
+ }
+ assert(OrAll == (1u << Width) -1);
+#endif
+
return Comps;
}
if (llvm::any_of(Comps, [](uint32_t P) { return P == 0; }))
return OpRef::fail();
+dbgs() << "Comps:["; for (unsigned I : Comps) dbgs() << ' ' << I; dbgs() << " ]\n";
auto Pick = completeToPerfect(Comps, LogLen);
+dbgs() << "Pick:["; for (unsigned I : Pick) dbgs() << ' ' << I; dbgs() << " ]\n";
for (unsigned I = 0; I != LogLen; ++I)
Perm[I] = Log2_32(Pick[I]);
--- /dev/null
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Check that this doesn't end up being an entirely perfect shuffle.
+; CHECK: vshuff
+; CHECK-NOT: vdeal
+define <32 x i32> @f0(<32 x i32> %a0, <32 x i32> %a1) #0 {
+ %v0 = shufflevector <32 x i32> %a0, <32 x i32> %a1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <32 x i32> %v0
+}
+
+attributes #0 = { nounwind memory(none) "target-features"="+hvxv62,+hvx-length128b" }