The Intel models need an extra 1uop for memory folded instructions, plus a lot of instructions take a non-default memory latency which should allow us to use the multiclass a lot more to tidy things up.
Differential Revision: https://reviews.llvm.org/D44840
llvm-svn: 328446
// folded loads.
multiclass BWWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
- int Lat, list<int> Res = [1], int UOps = 1> {
+ int Lat, list<int> Res = [1], int UOps = 1,
+ int LoadLat = 5> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
let NumMicroOps = UOps;
}
- // Memory variant also uses a cycle on port 2/3 and adds 5 cycles to the
- // latency.
+ // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
+ // the latency (default = 5).
def : WriteRes<SchedRW.Folded, !listconcat([BWPort23], ExePorts)> {
- let Latency = !add(Lat, 5);
+ let Latency = !add(Lat, LoadLat);
let ResourceCycles = !listconcat([1], Res);
- let NumMicroOps = UOps;
+ let NumMicroOps = !add(UOps, 1);
}
}
// folded loads.
multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
- int Lat, list<int> Res = [1], int UOps = 1> {
+ int Lat, list<int> Res = [1], int UOps = 1,
+ int LoadLat = 5> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
let NumMicroOps = UOps;
}
- // Memory variant also uses a cycle on port 2/3 and adds 5 cycles to the
- // latency.
+ // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
+ // the latency (default = 5).
def : WriteRes<SchedRW.Folded, !listconcat([HWPort23], ExePorts)> {
- let Latency = !add(Lat, 5);
+ let Latency = !add(Lat, LoadLat);
let ResourceCycles = !listconcat([1], Res);
- let NumMicroOps = UOps;
+ let NumMicroOps = !add(UOps, 1);
}
}
// folded loads.
multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
- int Lat, list<int> Res = [1], int UOps = 1> {
+ int Lat, list<int> Res = [1], int UOps = 1,
+ int LoadLat = 4> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
let NumMicroOps = UOps;
}
- // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
- // latency.
+ // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
+ // the latency (default = 4).
def : WriteRes<SchedRW.Folded, !listconcat([SBPort23], ExePorts)> {
- let Latency = !add(Lat, 4);
+ let Latency = !add(Lat, LoadLat);
let ResourceCycles = !listconcat([1], Res);
- let NumMicroOps = UOps;
+ let NumMicroOps = !add(UOps, 1);
}
}
// folded loads.
multiclass SKLWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
- int Lat, list<int> Res = [1], int UOps = 1> {
+ int Lat, list<int> Res = [1], int UOps = 1,
+ int LoadLat = 5> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
let NumMicroOps = UOps;
}
- // Memory variant also uses a cycle on port 2/3 and adds 5 cycles to the
- // latency.
+ // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
+ // the latency (default = 5).
def : WriteRes<SchedRW.Folded, !listconcat([SKLPort23], ExePorts)> {
- let Latency = !add(Lat, 5);
+ let Latency = !add(Lat, LoadLat);
let ResourceCycles = !listconcat([1], Res);
- let NumMicroOps = UOps;
+ let NumMicroOps = !add(UOps, 1);
}
}
// folded loads.
multiclass SKXWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
- int Lat, list<int> Res = [1], int UOps = 1> {
+ int Lat, list<int> Res = [1], int UOps = 1,
+ int LoadLat = 5> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
let NumMicroOps = UOps;
}
- // Memory variant also uses a cycle on port 2/3 and adds 5 cycles to the
- // latency.
+ // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
+ // the latency (default = 5).
def : WriteRes<SchedRW.Folded, !listconcat([SKXPort23], ExePorts)> {
- let Latency = !add(Lat, 5);
+ let Latency = !add(Lat, LoadLat);
let ResourceCycles = !listconcat([1], Res);
- let NumMicroOps = UOps;
+ let NumMicroOps = !add(UOps, 1);
}
}
// folded loads.
multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
- int Lat, list<int> Res = [1], int UOps = 1> {
+ int Lat, list<int> Res = [1], int UOps = 1,
+ int LoadLat = 3> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
let NumMicroOps = UOps;
}
- // Memory variant also uses a cycle on MEC_RSV and adds 3 cycles to the
- // latency.
+ // Memory variant also uses a cycle on MEC_RSV and adds LoadLat cycles to
+ // the latency (default = 3).
def : WriteRes<SchedRW.Folded, !listconcat([SLM_MEC_RSV], ExePorts)> {
- let Latency = !add(Lat, 3);
+ let Latency = !add(Lat, LoadLat);
let ResourceCycles = !listconcat([1], Res);
let NumMicroOps = UOps;
}