return false;
const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
- this](int64_t ExpectedValue) -> bool {
+ this](int64_t ExpectedValue,
+ unsigned SrcSize) -> bool {
// s_cmp_eq_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
// s_cmp_eq_i32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
// s_cmp_ge_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
// s_cmp_gt_u32 (s_and_b32 $src, 1), 0 => s_and_b32 $src, 1
// s_cmp_gt_i32 (s_and_b32 $src, 1), 0 => s_and_b32 $src, 1
// s_cmp_lg_u64 (s_and_b64 $src, 1), 0 => s_and_b64 $src, 1
-
- // TODO: Fold this into s_bitcmp* if result of an AND is unused.
- // TODO: If s_bitcmp can be used we are not limited to 1 and 0 but can
- // process any power of 2.
+ //
+ // If result of the AND is unused except in the compare:
+ // s_and_b(32|64) $src, 1 => s_bitcmp1_b(32|64) $src, 0
if (CmpValue != ExpectedValue)
return false;
Def->getOpcode() != AMDGPU::S_AND_B64)
return false;
- if ((!Def->getOperand(1).isImm() || Def->getOperand(1).getImm() != 1) &&
- (!Def->getOperand(2).isImm() || Def->getOperand(2).getImm() != 1))
+ MachineOperand *SrcOp = &Def->getOperand(1);
+ if (SrcOp->isImm() && SrcOp->getImm() == 1)
+ SrcOp = &Def->getOperand(2);
+ else if (!Def->getOperand(2).isImm() || Def->getOperand(2).getImm() != 1)
return false;
for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator();
SccDef->setIsDead(false);
CmpInstr.eraseFromParent();
+ if (!MRI->use_nodbg_empty(Def->getOperand(0).getReg()))
+ return true;
+
+ // Replace AND with unused result with a S_BITCMP.
+ // TODO: If s_bitcmp can be used we are not limited to 1 and 0 but can
+ // process any power of 2.
+ MachineBasicBlock *MBB = Def->getParent();
+
+ // TODO: Reverse conditions can use S_BITCMP0_*.
+ unsigned NewOpc = (SrcSize == 32) ? AMDGPU::S_BITCMP1_B32
+ : AMDGPU::S_BITCMP1_B64;
+
+ BuildMI(*MBB, Def, Def->getDebugLoc(), get(NewOpc))
+ .add(*SrcOp)
+ .addImm(0);
+ Def->eraseFromParent();
+
return true;
};
case AMDGPU::S_CMP_EQ_I32:
case AMDGPU::S_CMP_GE_U32:
case AMDGPU::S_CMP_GE_I32:
- case AMDGPU::S_CMP_EQ_U64:
case AMDGPU::S_CMPK_EQ_U32:
case AMDGPU::S_CMPK_EQ_I32:
case AMDGPU::S_CMPK_GE_U32:
case AMDGPU::S_CMPK_GE_I32:
- return optimizeCmpAnd(1);
+ return optimizeCmpAnd(1, 32);
+ case AMDGPU::S_CMP_EQ_U64:
+ return optimizeCmpAnd(1, 64);
case AMDGPU::S_CMP_LG_U32:
case AMDGPU::S_CMP_LG_I32:
case AMDGPU::S_CMP_GT_U32:
case AMDGPU::S_CMP_GT_I32:
- case AMDGPU::S_CMP_LG_U64:
case AMDGPU::S_CMPK_LG_U32:
case AMDGPU::S_CMPK_LG_I32:
case AMDGPU::S_CMPK_GT_U32:
case AMDGPU::S_CMPK_GT_I32:
- return optimizeCmpAnd(0);
+ return optimizeCmpAnd(0, 32);
+ case AMDGPU::S_CMP_LG_U64:
+ return optimizeCmpAnd(0, 64);
}
return false;
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0, $vgpr0_vgpr1
+
+ %0:sreg_32 = COPY $sgpr0
+ %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+ S_CMP_EQ_U32 killed %1:sreg_32, 1, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...
+
+---
+name: and_1_cmp_eq_u32_1_used_and
+body: |
+ ; GCN-LABEL: name: and_1_cmp_eq_u32_1_used_and
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+ ; GCN: S_NOP 0, implicit [[S_AND_B32_]]
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
%0:sreg_32 = COPY $sgpr0
%1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc
+ S_NOP 0, implicit %1
S_CMP_EQ_U32 killed %1:sreg_32, 1, implicit-def $scc
S_CBRANCH_SCC0 %bb.2, implicit $scc
S_BRANCH %bb.1
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+ ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[COPY]], 1, implicit-def $scc
+ ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+ ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+ ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+ ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+ ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+ ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+ ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 1, killed [[COPY]], implicit-def $scc
+ ; GCN: S_BITCMP1_B64 killed [[COPY]], 0, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
- ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 1, killed [[COPY]], implicit-def $scc
+ ; GCN: S_BITCMP1_B64 killed [[COPY]], 0, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+ ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+ ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+ ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+ ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+ ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+ ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+ ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc
+ ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
S_ENDPGM 0
...
+
+---
+name: and_1_cmp_eq_u32_1_imm_src
+body: |
+ ; GCN-LABEL: name: and_1_cmp_eq_u32_1_imm_src
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN: S_BITCMP1_B32 11, 0, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: S_BRANCH %bb.1
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: bb.2:
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $sgpr0, $vgpr0_vgpr1
+
+ %0:sreg_32 = S_AND_B32 1, 11, implicit-def dead $scc
+ S_CMP_EQ_U32 killed %0:sreg_32, 1, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.2, implicit $scc
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ bb.2:
+ S_ENDPGM 0
+
+...