Resource2Groups(SM.getNumProcResourceKinds() - 1, 0),
ProcResID2Mask(SM.getNumProcResourceKinds(), 0),
ResIndex2ProcResID(SM.getNumProcResourceKinds() - 1, 0),
- ProcResUnitMask(0), ReservedResourceGroups(0),
- AvailableBuffers(~0ULL), ReservedBuffers(0) {
+ ProcResUnitMask(0), ReservedResourceGroups(0), AvailableBuffers(~0ULL),
+ ReservedBuffers(0) {
computeProcResourceMasks(SM, ProcResID2Mask);
// initialize vector ResIndex2ProcResID.
BusyResourceMask |= E.first;
}
+ uint64_t ImplicitUses = Desc.ImplicitlyUsedProcResUnits;
+ while (ImplicitUses) {
+ uint64_t Use = ImplicitUses & -ImplicitUses;
+ ImplicitUses ^= Use;
+ unsigned Index = getResourceStateIndex(Use);
+ if (!Resources[Index]->isReady(/* NumUnits */ 1))
+ BusyResourceMask |= Index;
+ }
+
BusyResourceMask &= ProcResUnitMask;
if (BusyResourceMask)
return BusyResourceMask;
// Populate resources consumed.
using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
- std::vector<ResourcePlusCycles> Worklist;
+ SmallVector<ResourcePlusCycles, 4> Worklist;
// Track cycles contributed by resources that are in a "Super" relationship.
// This is required if we want to correctly match the behavior of method
uint64_t UsedResourceUnits = 0;
uint64_t UsedResourceGroups = 0;
+ auto GroupIt = find_if(Worklist, [](const ResourcePlusCycles &Elt) {
+ return countPopulation(Elt.first) > 1;
+ });
+ unsigned FirstGroupIdx = std::distance(Worklist.begin(), GroupIt);
+ uint64_t ImpliedUsesOfResourceUnits = 0;
// Remove cycles contributed by smaller resources.
for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
// Remove the leading 1 from the resource group mask.
NormalizedMask ^= PowerOf2Floor(NormalizedMask);
UsedResourceGroups |= (A.first ^ NormalizedMask);
+
+ uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
+ if ((NormalizedMask != AvailableMask) &&
+ countPopulation(AvailableMask) == 1) {
+ // At simulation time, this resource group use will decay into a simple
+ // use of the resource unit identified by `AvailableMask`.
+ ImpliedUsesOfResourceUnits |= AvailableMask;
+ UsedResourceUnits |= AvailableMask;
+ }
}
for (unsigned J = I + 1; J < E; ++J) {
}
}
+ // Look for implicit uses of processor resource units. These are resource
+ // units which are indirectly consumed by resource groups, and that must be
+ // always available on instruction issue.
+ while (ImpliedUsesOfResourceUnits) {
+ ID.ImplicitlyUsedProcResUnits |= ImpliedUsesOfResourceUnits;
+ ImpliedUsesOfResourceUnits = 0;
+ for (unsigned I = FirstGroupIdx, E = Worklist.size(); I < E; ++I) {
+ ResourcePlusCycles &A = Worklist[I];
+ if (!A.second.size())
+ continue;
+
+ uint64_t NormalizedMask = A.first;
+ assert(countPopulation(NormalizedMask) > 1);
+ // Remove the leading 1 from the resource group mask.
+ NormalizedMask ^= PowerOf2Floor(NormalizedMask);
+ uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
+ if ((NormalizedMask != AvailableMask) &&
+ countPopulation(AvailableMask) != 1)
+ continue;
+
+ UsedResourceUnits |= AvailableMask;
+ ImpliedUsesOfResourceUnits |= AvailableMask;
+ }
+ }
+
// A SchedWrite may specify a number of cycles in which a resource group
// is reserved. For example (on target x86; cpu Haswell):
//
BufferIDs ^= Current;
}
dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
+ dbgs() << "\t\tImplicitly Used Units="
+ << format_hex(ID.ImplicitlyUsedProcResUnits, 16) << '\n';
dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
<< '\n';
});
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -all-views=false -summary-view -iterations=1 < %s | FileCheck %s
+
+# Do not crash when running this simulation.
+# It is not safe to issue FXRSTOR if SKLPort1 is not available.
+
+bswap %eax
+bswap %eax
+fxrstor 64(%rsp)
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 3
+# CHECK-NEXT: Total Cycles: 68
+# CHECK-NEXT: Total uOps: 92
+
+# CHECK: Dispatch Width: 6
+# CHECK-NEXT: uOps Per Cycle: 1.35
+# CHECK-NEXT: IPC: 0.04
+# CHECK-NEXT: Block RThroughput: 16.5