"alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
"true", "Use alternative pattern for sextload convert to f32">;
-def FeatureMacroOpFusion : SubtargetFeature<
- "macroop-fusion", "HasMacroOpFusion", "true",
- "CPU supports macro op fusion">;
+def FeatureArithmeticBccFusion : SubtargetFeature<
+ "arith-bcc-fusion", "HasArithmeticBccFusion", "true",
+ "CPU fuses arithmetic+bcc operations">;
+
+def FeatureArithmeticCbzFusion : SubtargetFeature<
+ "arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
+ "CPU fuses arithmetic + cbz/cbnz operations">;
def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
FeatureCrypto,
FeatureDisableLatencySchedHeuristic,
FeatureFPARMv8,
- FeatureMacroOpFusion,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
FeatureNEON,
FeaturePerfMon,
FeatureSlowMisaligned128Store,
FeatureCRC,
FeatureCrypto,
FeatureFPARMv8,
- FeatureMacroOpFusion,
+ FeatureArithmeticBccFusion,
FeatureNEON,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr &First,
MachineInstr &Second) const {
- if (Subtarget.hasMacroOpFusion()) {
+ if (Subtarget.hasArithmeticBccFusion()) {
// Fuse CMN, CMP, TST followed by Bcc.
unsigned SecondOpcode = Second.getOpcode();
if (SecondOpcode == AArch64::Bcc) {
switch (First.getOpcode()) {
default:
return false;
- case AArch64::SUBSWri:
case AArch64::ADDSWri:
- case AArch64::ANDSWri:
- case AArch64::SUBSXri:
+ case AArch64::ADDSWrr:
case AArch64::ADDSXri:
+ case AArch64::ADDSXrr:
+ case AArch64::ANDSWri:
+ case AArch64::ANDSWrr:
case AArch64::ANDSXri:
+ case AArch64::ANDSXrr:
+ case AArch64::SUBSWri:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXri:
+ case AArch64::SUBSXrr:
+ case AArch64::BICSWrr:
+ case AArch64::BICSXrr:
return true;
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSXrs:
+ case AArch64::ANDSWrs:
+ case AArch64::ANDSXrs:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSXrs:
+ case AArch64::BICSWrs:
+ case AArch64::BICSXrs:
+ // Shift value can be 0 making these behave like the "rr" variant...
+ return !hasShiftedReg(Second);
}
}
+ }
+ if (Subtarget.hasArithmeticCbzFusion()) {
// Fuse ALU operations followed by CBZ/CBNZ.
+ unsigned SecondOpcode = Second.getOpcode();
if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
switch (First.getOpcode()) {
default:
return false;
case AArch64::ADDWri:
+ case AArch64::ADDWrr:
case AArch64::ADDXri:
+ case AArch64::ADDXrr:
case AArch64::ANDWri:
+ case AArch64::ANDWrr:
case AArch64::ANDXri:
+ case AArch64::ANDXrr:
case AArch64::EORWri:
+ case AArch64::EORWrr:
case AArch64::EORXri:
+ case AArch64::EORXrr:
case AArch64::ORRWri:
+ case AArch64::ORRWrr:
case AArch64::ORRXri:
+ case AArch64::ORRXrr:
case AArch64::SUBWri:
+ case AArch64::SUBWrr:
case AArch64::SUBXri:
+ case AArch64::SUBXrr:
return true;
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::ANDWrs:
+ case AArch64::ANDXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ case AArch64::BICWrs:
+ case AArch64::BICXrs:
+ // Shift value can be 0 making these behave like the "rr" variant...
+ return !hasShiftedReg(Second);
}
}
}
bool Misaligned128StoreIsSlow = false;
bool AvoidQuadLdStPairs = false;
bool UseAlternateSExtLoadCVTF32Pattern = false;
- bool HasMacroOpFusion = false;
+ bool HasArithmeticBccFusion = false;
+ bool HasArithmeticCbzFusion = false;
bool DisableLatencySchedHeuristic = false;
uint8_t MaxInterleaveFactor = 2;
uint8_t VectorInsertExtractBaseCost = 3;
bool useAlternateSExtLoadCVTF32Pattern() const {
return UseAlternateSExtLoadCVTF32Pattern;
}
- bool hasMacroOpFusion() const { return HasMacroOpFusion; }
+ bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
+ bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {
return VectorInsertExtractBaseCost;
-; RUN: llc -o - %s -mattr=+macroop-fusion,+use-postra-scheduler | FileCheck %s
+; RUN: llc -o - %s -mattr=+arith-cbz-fusion,+use-postra-scheduler | FileCheck %s
; RUN: llc -o - %s -mcpu=cyclone | FileCheck %s
target triple = "arm64-apple-ios"