// Bit tests instructions: BT, BTS, BTR, BTC.
let Defs = [EFLAGS] in {
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteBitTest] in {
def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>,
[]>, TB, NotMemoryFoldable;
}
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteBitTest] in {
def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>,
} // SchedRW
let hasSideEffects = 0 in {
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTC16rr : I<0xBB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
NotMemoryFoldable;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
Requires<[In64BitMode]>;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTR16rr : I<0xB3, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
NotMemoryFoldable;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB;
Requires<[In64BitMode]>;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTS16rr : I<0xAB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
NotMemoryFoldable;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [BWPort06]>;
+def : WriteRes<WriteBitTest,[BWPort06]>; // Bit Test instrs
// Bit counts.
defm : BWWriteResPair<WriteBSF, [BWPort1], 3>;
let ResourceCycles = [1];
}
def: InstRW<[BWWriteResGroup6], (instrs CDQ, CQO)>;
-def: InstRW<[BWWriteResGroup6], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def BWWriteResGroup7 : SchedWriteRes<[BWPort15]> {
let Latency = 1;
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [HWPort06]>;
+def : WriteRes<WriteBitTest,[HWPort06]>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
let ResourceCycles = [1];
}
def: InstRW<[HWWriteResGroup7], (instrs CDQ, CQO)>;
-def: InstRW<[HWWriteResGroup7], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def HWWriteResGroup8 : SchedWriteRes<[HWPort15]> {
let Latency = 1;
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [SBPort05]>;
+def : WriteRes<WriteBitTest,[SBPort05]>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
let ResourceCycles = [1];
}
def: InstRW<[SBWriteResGroup4], (instrs CDQ, CQO)>;
-def: InstRW<[SBWriteResGroup4], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
let Latency = 1;
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [SKLPort06]>;
+def : WriteRes<WriteBitTest,[SKLPort06]>; //
// Bit counts.
defm : SKLWriteResPair<WriteBSF, [SKLPort1], 3>;
let ResourceCycles = [1];
}
def: InstRW<[SKLWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
-def: InstRW<[SKLWriteResGroup7], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def SKLWriteResGroup8 : SchedWriteRes<[SKLPort15]> {
let Latency = 1;
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [SKXPort06]>;
+def : WriteRes<WriteBitTest,[SKXPort06]>; //
// Integer shifts and rotates.
defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>;
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
-def: InstRW<[SKXWriteResGroup7], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> {
let Latency = 1;
def WriteSETCC : SchedWrite; // Set register based on condition code.
def WriteSETCCStore : SchedWrite;
def WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH.
+def WriteBitTest : SchedWrite; // Bit Test - TODO add memory folding support
// Integer shifts and rotates.
defm WriteShift : X86SchedWritePair;
let Latency = 2;
let ResourceCycles = [2];
}
+def : WriteRes<WriteBitTest,[AtomPort01]>;
defm : X86WriteResUnsupported<WriteIMulH>;
def : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
def : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>;
def : WriteRes<WriteLAHFSAHF, [JALU01]>;
+def : WriteRes<WriteBitTest,[JALU01]>;
// This is for simple LEAs with one or two input operands.
def : WriteRes<WriteLEA, [JALU01]>;
let ResourceCycles = [2,1];
}
def : WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01]>;
+def : WriteRes<WriteBitTest,[SLM_IEC_RSV01]>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
def : WriteRes<WriteSETCC, [ZnALU]>;
def : WriteRes<WriteSETCCStore, [ZnALU, ZnAGU]>;
defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>;
+def : WriteRes<WriteBitTest,[ZnALU]>;
// Bit counts.
defm : ZnWriteResPair<WriteBSF, [ZnALU], 3>;
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "subtarget-emitter"
+#ifdef EXPENSIVE_CHECKS
+// FIXME: TableGen is failed iff EXPENSIVE_CHECKS defined
+static constexpr bool OptCheckSchedClasses = true;
+#else
+// FIXME: the default value should be false
+static cl::opt<bool> OptCheckSchedClasses(
+ "check-sched-class-table", cl::init(true), cl::Hidden,
+ cl::desc("Check sched class table on different types of inconsistencies"));
+#endif
+
#ifndef NDEBUG
static void dumpIdxVec(ArrayRef<unsigned> V) {
for (unsigned Idx : V)
collectOptionalProcessorInfo();
checkCompleteness();
+ checkSchedClasses();
}
void CodeGenSchedModels::collectRetireControlUnits() {
}
}
+void CodeGenSchedModels::checkSchedClasses() {
+ if (!OptCheckSchedClasses)
+ return;
+
+ std::string str;
+ raw_string_ostream OS(str);
+
+ // Check each instruction for each model to see if its overridden too often.
+ // Iff YES it's a candidate for more fine-grained Sched Class.
+ for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) {
+ StringRef InstName = Inst->TheDef->getName();
+ unsigned SCIdx = getSchedClassIdx(*Inst);
+ if (!SCIdx)
+ continue;
+ CodeGenSchedClass &SC = getSchedClass(SCIdx);
+ if (SC.Writes.empty())
+ continue;
+ const RecVec &RWDefs = SchedClasses[SCIdx].InstRWs;
+ if (RWDefs.empty())
+ continue;
+ // FIXME: what should be threshold here?
+ if (RWDefs.size() > (ProcModels.size() / 2)) {
+ // FIXME: this dump hangs the execution !!!
+ // SC.dump(&Target.getSchedModels());
+ OS << "SchedRW machine model for inst '" << InstName << "' (";
+ for (auto I : SC.Writes)
+ OS << " " << SchedWrites[I].Name;
+ for (auto I : SC.Reads)
+ OS << " " << SchedReads[I].Name;
+ OS << " ) should be updated /improvedbecause it's overriden " << RWDefs.size()
+ << " times out of " << ProcModels.size() << " models:\n\t";
+ for (Record *RWDef : RWDefs)
+ OS << " " << getProcModel(RWDef->getValueAsDef("SchedModel")).ModelName;
+ PrintWarning(OS.str());
+ str.clear();
+ }
+
+ // TODO: here we should check latency/uop in SC vs. RWDef. Maybe we
+ // should do it iff RWDefs.size() == 1 only.
+ // Iff latency/uop are the same then warn about unnecessary redefine.
+ if (RWDefs.size()) {
+ for (Record *RWDef : RWDefs) {
+ IdxVec Writes;
+ IdxVec Reads;
+ findRWs(RWDef->getValueAsListOfDefs("OperandReadWrites"), Writes,
+ Reads);
+
+ if ((Writes.size() == SC.Writes.size()) &&
+ (Reads.size() == SC.Reads.size())) {
+ // TODO: do we need sorting Write & Reads?
+ for (unsigned I = 0, S = SC.Writes.size(); I < S; I++) {
+ auto SCSchedW = SchedWrites[SC.Writes[I]];
+ auto SchedW = SchedWrites[Writes[I]];
+ if (!SCSchedW.TheDef || !SchedW.TheDef)
+ continue;
+ const RecordVal *R = SCSchedW.TheDef->getValue("Latency");
+ // FIXME: We should deal with default Latency here
+ if (!R || !R->getValue())
+ continue;
+ auto SCLat = SCSchedW.TheDef->getValueAsInt("Latency");
+ auto SCuOp = SCSchedW.TheDef->getValueAsInt("NumMicroOps");
+ auto Lat = SchedW.TheDef->getValueAsInt("Latency");
+ auto uOp = SchedW.TheDef->getValueAsInt("NumMicroOps");
+ if ((SCLat == Lat) && (SCuOp == uOp))
+ OS << "Overridden verion of inst '" << InstName
+ << "' has the same latency & uOp values as the original one "
+ "for model '"
+ << getProcModel(RWDef->getValueAsDef("SchedModel")).ModelName
+ << "'\n";
+ }
+ if (!str.empty()) {
+ PrintWarning(OS.str());
+ str.clear();
+ }
+ }
+ }
+ }
+ }
+}
+
// Get the SchedClass index for an instruction.
unsigned
CodeGenSchedModels::getSchedClassIdx(const CodeGenInstruction &Inst) const {
void collectSchedClasses();
+ void checkSchedClasses();
+
void collectRetireControlUnits();
void collectRegisterFiles();