SubtargetFeature<"fuse-logical", "HasLogicalFusion", "true",
"Target supports Logical Operations fusion",
[FeatureFusion]>;
+def FeatureSha3Fusion :
+ SubtargetFeature<"fuse-sha3", "HasSha3Fusion", "true",
+ "Target supports SHA3 assist fusion",
+ [FeatureFusion]>;
+def FeatureCompareFusion:
+ SubtargetFeature<"fuse-cmp", "HasCompareFusion", "true",
+ "Target supports Comparison Operations fusion",
+ [FeatureFusion]>;
+def FeatureWideImmFusion:
+ SubtargetFeature<"fuse-wideimm", "HasWideImmFusion", "true",
+ "Target supports Wide-Immediate fusion",
+ [FeatureFusion]>;
+def FeatureZeroMoveFusion:
+ SubtargetFeature<"fuse-zeromove", "HasZeroMoveFusion", "true",
+ "Target supports move to SPR with branch fusion",
+ [FeatureFusion]>;
def FeatureUnalignedFloats :
SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
"true", "CPU does not trap on unaligned FP access">;
// still exist with the exception of those we know are Power9 specific.
list<SubtargetFeature> FusionFeatures = [
FeatureStoreFusion, FeatureAddLogicalFusion, FeatureLogicalAddFusion,
- FeatureLogicalFusion, FeatureArithAddFusion
+ FeatureLogicalFusion, FeatureArithAddFusion, FeatureSha3Fusion,
];
list<SubtargetFeature> P10AdditionalFeatures =
!listconcat(FusionFeatures, [
case FusionFeature::FK_SldiAdd:
return (matchingImmOps(FirstMI, 2, 3) && matchingImmOps(FirstMI, 3, 60)) ||
(matchingImmOps(FirstMI, 2, 6) && matchingImmOps(FirstMI, 3, 57));
+
+ // rldicl rx, ra, 1, 0 - xor
+ case FusionFeature::FK_RotateLeftXor:
+ return matchingImmOps(FirstMI, 2, 1) && matchingImmOps(FirstMI, 3, 0);
+
+ // rldicr rx, ra, 1, 63 - xor
+ case FusionFeature::FK_RotateRightXor:
+ return matchingImmOps(FirstMI, 2, 1) && matchingImmOps(FirstMI, 3, 63);
+
+ // We actually use CMPW* and CMPD*, 'l' doesn't exist as an operand in instr.
+
+ // { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpi 0,1,rx,{ 0,1,-1 }
+ // { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpli 0,L,rx,{ 0,1 }
+ case FusionFeature::FK_LoadCmp1:
+ // { ld,ldx } - cmpi 0,1,rx,{ 0,1,-1 }
+ // { ld,ldx } - cmpli 0,1,rx,{ 0,1 }
+ case FusionFeature::FK_LoadCmp2: {
+ const MachineOperand &BT = SecondMI.getOperand(0);
+ if (!BT.isReg() ||
+ (!Register::isVirtualRegister(BT.getReg()) && BT.getReg() != PPC::CR0))
+ return false;
+ if (SecondMI.getOpcode() == PPC::CMPDI &&
+ matchingImmOps(SecondMI, 2, -1, 16))
+ return true;
+ return matchingImmOps(SecondMI, 2, 0) || matchingImmOps(SecondMI, 2, 1);
+ }
+
+ // { lha,lhax,lwa,lwax } - cmpi 0,L,rx,{ 0,1,-1 }
+ case FusionFeature::FK_LoadCmp3: {
+ const MachineOperand &BT = SecondMI.getOperand(0);
+ if (!BT.isReg() ||
+ (!Register::isVirtualRegister(BT.getReg()) && BT.getReg() != PPC::CR0))
+ return false;
+ return matchingImmOps(SecondMI, 2, 0) || matchingImmOps(SecondMI, 2, 1) ||
+ matchingImmOps(SecondMI, 2, -1, 16);
+ }
+
+ // mtctr - { bcctr,bcctrl }
+ case FusionFeature::FK_ZeroMoveCTR:
+ // ( mtctr rx ) is alias of ( mtspr 9, rx )
+ return (FirstMI.getOpcode() != PPC::MTSPR &&
+ FirstMI.getOpcode() != PPC::MTSPR8) ||
+ matchingImmOps(FirstMI, 0, 9);
+
+ // mtlr - { bclr,bclrl }
+ case FusionFeature::FK_ZeroMoveLR:
+ // ( mtlr rx ) is alias of ( mtspr 8, rx )
+ return (FirstMI.getOpcode() != PPC::MTSPR &&
+ FirstMI.getOpcode() != PPC::MTSPR8) ||
+ matchingImmOps(FirstMI, 0, 8);
+
+ // addis rx,ra,si - addi rt,rx,SI, SI >= 0
+ case FusionFeature::FK_AddisAddi: {
+ const MachineOperand &RA = FirstMI.getOperand(1);
+ const MachineOperand &SI = SecondMI.getOperand(2);
+ if (!SI.isImm() || !RA.isReg())
+ return false;
+ if (RA.getReg() == PPC::ZERO || RA.getReg() == PPC::ZERO8)
+ return false;
+ return SignExtend64(SI.getImm(), 16) >= 0;
+ }
+
+ // addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2
+ case FusionFeature::FK_AddiAddis: {
+ const MachineOperand &RA = FirstMI.getOperand(1);
+ const MachineOperand &SI = FirstMI.getOperand(2);
+ if (!SI.isImm() || !RA.isReg())
+ return false;
+ if (RA.getReg() == PPC::ZERO || RA.getReg() == PPC::ZERO8)
+ return false;
+ int64_t ExtendedSI = SignExtend64(SI.getImm(), 16);
+ return ExtendedSI >= 2;
+ }
}
llvm_unreachable("All the cases should have been handled");
FUSION_FEATURE(SldiAdd, hasArithAddFusion, -1, FUSION_OP_SET(RLDICR, RLDICR_32),
FUSION_OP_SET(ADD4, ADD8, SUBF, SUBF8))
+// rldicl rx, ra, 1, 0 - xor
+FUSION_FEATURE(RotateLeftXor, hasSha3Fusion, 1,
+ FUSION_OP_SET(RLDICL, RLDICL_32, RLDICL_32_64),
+ FUSION_OP_SET(XOR, XOR8))
+
+// rldicr rx, ra, 1, 63 - xor
+FUSION_FEATURE(RotateRightXor, hasSha3Fusion, 1,
+ FUSION_OP_SET(RLDICR, RLDICR_32), FUSION_OP_SET(XOR, XOR8))
+
+// There're two special cases in 'load-compare' series, so we have to split
+// them into several pattern groups to fit into current framework. This can
+// be clearer once we switched to a more expressive approach.
+
+// { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpi 0,1,rx,{ 0,1,-1 }
+// { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpli 0,L,rx,{ 0,1 }
+FUSION_FEATURE(LoadCmp1, hasCompareFusion, 1,
+ FUSION_OP_SET(LBZ, LBZ8, LBZX, LBZX8, LBZXTLS, LBZXTLS_,
+ LBZXTLS_32, LHZ, LHZ8, LHZX, LHZX8, LHZXTLS,
+ LHZXTLS_, LHZXTLS_32, LWZ, LWZ8, LWZX, LWZX8,
+ LWZXTLS, LWZXTLS_, LWZXTLS_32),
+ FUSION_OP_SET(CMPDI, CMPLDI, CMPLWI))
+
+// { ld,ldx } - cmpi 0,1,rx,{ 0,1,-1 }
+// { ld,ldx } - cmpli 0,1,rx,{ 0,1 }
+FUSION_FEATURE(LoadCmp2, hasCompareFusion, 1,
+ FUSION_OP_SET(LD, LDX, LDXTLS, LDXTLS_),
+ FUSION_OP_SET(CMPDI, CMPLDI))
+
+// { lha,lhax,lwa,lwax } - cmpi 0,L,rx,{ 0,1,-1 }
+FUSION_FEATURE(LoadCmp3, hasCompareFusion, 1,
+ FUSION_OP_SET(LHA, LHA8, LHAX, LHAX8, LWA, LWA_32, LWAX,
+ LWAX_32),
+ FUSION_OP_SET(CMPLDI, CMPLWI))
+
+// ori - oris
+FUSION_FEATURE(OriOris, hasWideImmFusion, 1, FUSION_OP_SET(ORI, ORI8),
+ FUSION_OP_SET(ORIS, ORIS8))
+
+// lis - ori
+FUSION_FEATURE(LisOri, hasWideImmFusion, 1, FUSION_OP_SET(LIS, LIS8),
+ FUSION_OP_SET(ORI, ORI8))
+
+// oris - ori
+FUSION_FEATURE(OrisOri, hasWideImmFusion, 1, FUSION_OP_SET(ORIS, ORIS8),
+ FUSION_OP_SET(ORI, ORI8))
+
+// xori - xoris
+FUSION_FEATURE(XoriXoris, hasWideImmFusion, 1, FUSION_OP_SET(XORI, XORI8),
+ FUSION_OP_SET(XORIS, XORIS8))
+
+// xoris - xori
+FUSION_FEATURE(XorisXori, hasWideImmFusion, 1, FUSION_OP_SET(XORIS, XORIS8),
+ FUSION_OP_SET(XORI, XORI8))
+
+// addis rx,ra,si - addi rt,rx,SI, SI >= 0
+FUSION_FEATURE(AddisAddi, hasWideImmFusion, 1,
+ FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8),
+ FUSION_OP_SET(ADDI, ADDI8, ADDItocL))
+
+// addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2
+FUSION_FEATURE(AddiAddis, hasWideImmFusion, 1,
+ FUSION_OP_SET(ADDI, ADDI8, ADDItocL),
+ FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8))
+
+// mtctr - { bcctr,bcctrl }
+FUSION_FEATURE(ZeroMoveCTR, hasZeroMoveFusion, -1,
+ FUSION_OP_SET(MTCTR, MTCTRloop, MTSPR8, MTSPR),
+ FUSION_OP_SET(BCCTR, BCCTRn, BCCTR8, BCCTR8n, BCCTRL, BCCTRLn,
+ BCCTRL8, BCCTRL8n, gBCCTR, gBCCTRL))
+
+// mtlr - { bclr,bclrl }
+FUSION_FEATURE(ZeroMoveLR, hasZeroMoveFusion, -1,
+ FUSION_OP_SET(MTLR8, MTLR, MTSPR8, MTSPR),
+ FUSION_OP_SET(BCLR, BCLRn, gBCLR, BCLRL, BCLRLn, gBCLRL))
+
#undef FUSION_FEATURE
#undef FUSION_OP_SET
HasAddLogicalFusion = false;
HasLogicalAddFusion = false;
HasLogicalFusion = false;
+ HasSha3Fusion = false;
+ HasCompareFusion = false;
+ HasWideImmFusion = false;
+ HasZeroMoveFusion = false;
IsISA2_06 = false;
IsISA2_07 = false;
IsISA3_0 = false;
bool HasAddLogicalFusion;
bool HasLogicalAddFusion;
bool HasLogicalFusion;
+ bool HasSha3Fusion;
+ bool HasCompareFusion;
+ bool HasWideImmFusion;
+ bool HasZeroMoveFusion;
bool IsISA2_06;
bool IsISA2_07;
bool IsISA3_0;
bool hasAddLogicalFusion() const { return HasAddLogicalFusion; }
bool hasLogicalAddFusion() const { return HasLogicalAddFusion; }
bool hasLogicalFusion() const { return HasLogicalFusion; }
+ bool hasCompareFusion() const { return HasCompareFusion; }
+ bool hasWideImmFusion() const { return HasWideImmFusion; }
+ bool hasSha3Fusion() const { return HasSha3Fusion; }
+ bool hasZeroMoveFusion() const { return HasZeroMoveFusion; }
bool needsSwapsForVSXMemOps() const {
return hasVSX() && isLittleEndian() && !hasP9Vector();
}
# REQUIRES: asserts
# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 -x=mir < %s \
# RUN: -debug-only=machine-scheduler -start-before=postmisched 2>&1 \
+# RUN: -mattr=+fuse-zeromove,+fuse-cmp,+fuse-wideimm \
# RUN: | FileCheck %s
# CHECK: add_mulld:%bb.0
renamable $x3 = ADD8 killed renamable $x4, $x5
BLR8 implicit $lr8, implicit $rm, implicit $x3
...
+
+# CHECK: rldicl_xor:%bb.0
+# CHECK: Macro fuse: SU(0) - SU(1) / RLDICL - XOR8
+---
+name: rldicl_xor
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x3, $x4, $x5
+ renamable $x4 = RLDICL $x3, 1, 0
+ renamable $x3 = XOR8 killed renamable $x4, $x5
+ BLR8 implicit $lr8, implicit $rm, implicit $x3
+...
+
+# CHECK: rldicr_xor:%bb.0
+# CHECK: Macro fuse: SU(0) - SU(1) / RLDICR - XOR8
+---
+name: rldicr_xor
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x3, $x4, $x5
+ renamable $x4 = RLDICR $x3, 1, 63
+ renamable $x3 = XOR8 killed renamable $x4, $x5
+ BLR8 implicit $lr8, implicit $rm, implicit $x3
+...
+
+# CHECK: ori_oris:%bb.0
+# CHECK: Macro fuse: SU(0) - SU(1) / ORI8 - ORIS8
+---
+name: ori_oris
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x3, $x4
+ renamable $x4 = ORI8 $x3, 63
+ renamable $x3 = ORIS8 killed renamable $x4, 20
+ BLR8 implicit $lr8, implicit $rm, implicit $x3
+...
+
+# CHECK: load_cmp:%bb.0
+# CHECK: Macro fuse: SU(0) - SU(1) / LD - CMPDI
+---
+name: load_cmp
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x3, $x4, $x5
+ renamable $x3 = LD 0, killed renamable $x3
+ renamable $cr0 = CMPDI killed renamable $x3, 0
+ renamable $x3 = ISEL8 killed renamable $x5, killed renamable $x4, renamable $cr0lt, implicit killed $cr0
+ BLR8 implicit $lr8, implicit $rm, implicit $x3