const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
this](int64_t ExpectedValue,
- unsigned SrcSize) -> bool {
+ unsigned SrcSize,
+ bool IsReversable) -> bool {
// s_cmp_eq_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
// s_cmp_eq_i32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
// s_cmp_ge_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
//
// If result of the AND is unused except in the compare:
// s_and_b(32|64) $src, 1 => s_bitcmp1_b(32|64) $src, 0
-
- if (CmpValue != ExpectedValue)
- return false;
+ //
+ // s_cmp_eq_u32 (s_and_b32 $src, 1), 0 => s_bitcmp0_b32 $src, 0
+ // s_cmp_eq_i32 (s_and_b32 $src, 1), 0 => s_bitcmp0_b32 $src, 0
+ // s_cmp_eq_u64 (s_and_b64 $src, 1), 0 => s_bitcmp0_b64 $src, 0
+ // s_cmp_lg_u32 (s_and_b32 $src, 1), 1 => s_bitcmp0_b32 $src, 0
+ // s_cmp_lg_i32 (s_and_b32 $src, 1), 1 => s_bitcmp0_b32 $src, 0
+ // s_cmp_lg_u64 (s_and_b64 $src, 1), 1 => s_bitcmp0_b64 $src, 0
+
+ bool IsReversedCC = false;
+ if (CmpValue != ExpectedValue) {
+ if (!IsReversable)
+ return false;
+ IsReversedCC = CmpValue == (ExpectedValue ^ 1);
+ if (!IsReversedCC)
+ return false;
+ }
MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg);
if (!Def || Def->getParent() != CmpInstr.getParent())
else if (!Def->getOperand(2).isImm() || Def->getOperand(2).getImm() != 1)
return false;
+ Register DefReg = Def->getOperand(0).getReg();
+ if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
+ return false;
+
for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator();
I != E; ++I) {
if (I->modifiesRegister(AMDGPU::SCC, &RI) ||
SccDef->setIsDead(false);
CmpInstr.eraseFromParent();
- if (!MRI->use_nodbg_empty(Def->getOperand(0).getReg()))
+ if (!MRI->use_nodbg_empty(DefReg)) {
+ assert(!IsReversedCC);
return true;
+ }
// Replace AND with unused result with a S_BITCMP.
// TODO: If s_bitcmp can be used we are not limited to 1 and 0 but can
// process any power of 2.
MachineBasicBlock *MBB = Def->getParent();
- // TODO: Reverse conditions can use S_BITCMP0_*.
- unsigned NewOpc = (SrcSize == 32) ? AMDGPU::S_BITCMP1_B32
- : AMDGPU::S_BITCMP1_B64;
+ unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
+ : AMDGPU::S_BITCMP1_B32
+ : IsReversedCC ? AMDGPU::S_BITCMP0_B64
+ : AMDGPU::S_BITCMP1_B64;
BuildMI(*MBB, Def, Def->getDebugLoc(), get(NewOpc))
.add(*SrcOp)
break;
case AMDGPU::S_CMP_EQ_U32:
case AMDGPU::S_CMP_EQ_I32:
- case AMDGPU::S_CMP_GE_U32:
- case AMDGPU::S_CMP_GE_I32:
case AMDGPU::S_CMPK_EQ_U32:
case AMDGPU::S_CMPK_EQ_I32:
+ return optimizeCmpAnd(1, 32, true);
+ case AMDGPU::S_CMP_GE_U32:
+ case AMDGPU::S_CMP_GE_I32:
case AMDGPU::S_CMPK_GE_U32:
case AMDGPU::S_CMPK_GE_I32:
- return optimizeCmpAnd(1, 32);
+ return optimizeCmpAnd(1, 32, false);
case AMDGPU::S_CMP_EQ_U64:
- return optimizeCmpAnd(1, 64);
+ return optimizeCmpAnd(1, 64, true);
case AMDGPU::S_CMP_LG_U32:
case AMDGPU::S_CMP_LG_I32:
- case AMDGPU::S_CMP_GT_U32:
- case AMDGPU::S_CMP_GT_I32:
case AMDGPU::S_CMPK_LG_U32:
case AMDGPU::S_CMPK_LG_I32:
+ return optimizeCmpAnd(0, 32, true);
+ case AMDGPU::S_CMP_GT_U32:
+ case AMDGPU::S_CMP_GT_I32:
case AMDGPU::S_CMPK_GT_U32:
case AMDGPU::S_CMPK_GT_I32:
- return optimizeCmpAnd(0, 32);
+ return optimizeCmpAnd(0, 32, false);
case AMDGPU::S_CMP_LG_U64:
- return optimizeCmpAnd(0, 64);
+ return optimizeCmpAnd(0, 64, true);
}
return false;
...
---
-name: and_1_cmp_eq_0
+name: and_1_cmp_eq_2
body: |
- ; GCN-LABEL: name: and_1_cmp_eq_0
+ ; GCN-LABEL: name: and_1_cmp_eq_2
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def dead $scc
- ; GCN: S_CMP_EQ_U32 killed [[S_AND_B32_]], 0, implicit-def $scc
+ ; GCN: S_CMP_EQ_U32 killed [[S_AND_B32_]], 2, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
%0:sreg_32 = COPY $sgpr0
%1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
- S_CMP_EQ_U32 killed %1:sreg_32, 0, implicit-def $scc
+ S_CMP_EQ_U32 killed %1:sreg_32, 2, implicit-def $scc
S_CBRANCH_SCC0 %bb.2, implicit $scc
S_BRANCH %bb.1
S_ENDPGM 0
...
+
+---
+name: and_1_cmp_eq_u32_0
+body: |
+ ; GCN-LABEL: name: and_1_cmp_eq_u32_0
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: S_BITCMP0_B32 killed [[COPY]], 0, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0, $vgpr0_vgpr1
+
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+ S_CMP_EQ_U32 killed %1:sreg_32, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: and_1_cmp_eq_i32_0
+body: |
+ ; GCN-LABEL: name: and_1_cmp_eq_i32_0
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: S_BITCMP0_B32 killed [[COPY]], 0, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0, $vgpr0_vgpr1
+
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+ S_CMP_EQ_I32 killed %1:sreg_32, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: and_1_cmp_eq_u64_0
+body: |
+ ; GCN-LABEL: name: and_1_cmp_eq_u64_0
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GCN: S_BITCMP0_B64 killed [[COPY]], 0, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+
+ %0:sreg_64 = COPY $sgpr0_sgpr1
+ %1:sreg_64 = S_AND_B64 1, killed %0, implicit-def dead $scc
+ S_CMP_EQ_U64 killed %1:sreg_64, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: and_1_cmp_lg_u32_1
+body: |
+ ; GCN-LABEL: name: and_1_cmp_lg_u32_1
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: S_BITCMP0_B32 killed [[COPY]], 0, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0, $vgpr0_vgpr1
+
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+ S_CMP_LG_U32 killed %1:sreg_32, 1, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: and_1_cmp_lg_i32_1
+body: |
+ ; GCN-LABEL: name: and_1_cmp_lg_i32_1
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: S_BITCMP0_B32 killed [[COPY]], 0, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0, $vgpr0_vgpr1
+
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+ S_CMP_LG_I32 killed %1:sreg_32, 1, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: and_1_cmp_lg_u64_1
+body: |
+ ; GCN-LABEL: name: and_1_cmp_lg_u64_1
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GCN: S_BITCMP0_B64 killed [[COPY]], 0, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+
+ %0:sreg_64 = COPY $sgpr0_sgpr1
+ %1:sreg_64 = S_AND_B64 1, killed %0, implicit-def dead $scc
+ S_CMP_LG_U64 killed %1:sreg_64, 1, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: and_1_cmp_eq_u32_0_used_and
+body: |
+ ; GCN-LABEL: name: and_1_cmp_eq_u32_0_used_and
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def dead $scc
+ ; GCN: S_CMP_EQ_U32 killed [[S_AND_B32_]], 0, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: S_NOP 0, implicit [[S_AND_B32_]]
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0, $vgpr0_vgpr1
+
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+ S_CMP_EQ_U32 killed %1:sreg_32, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ S_NOP 0, implicit %1
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: and_1_cmp_ge_u32_0
+body: |
+ ; GCN-LABEL: name: and_1_cmp_ge_u32_0
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def dead $scc
+ ; GCN: S_CMP_GE_U32 killed [[S_AND_B32_]], 0, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0, $vgpr0_vgpr1
+
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+ S_CMP_GE_U32 killed %1:sreg_32, 0, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: and_1_cmp_gt_u32_1
+body: |
+ ; GCN-LABEL: name: and_1_cmp_gt_u32_1
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def dead $scc
+ ; GCN: S_CMP_GT_U32 killed [[S_AND_B32_]], 1, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0, $vgpr0_vgpr1
+
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+ S_CMP_GT_U32 killed %1:sreg_32, 1, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...