// sub x, zext (setcc) => subcarry x, 0, setcc
// sub x, sext (setcc) => addcarry x, 0, setcc
-
- bool Commuted = false;
- unsigned Opc = LHS.getOpcode();
- if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND ||
- Opc == ISD::ANY_EXTEND) {
- std::swap(RHS, LHS);
- Commuted = true;
- }
-
- Opc = RHS.getOpcode();
+ unsigned Opc = RHS.getOpcode();
switch (Opc) {
default: break;
case ISD::ZERO_EXTEND:
if (!isBoolSGPR(Cond))
break;
SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1);
- SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
- SDValue Args[3];
- Args[2] = Cond;
-
- if (Commuted) {
- // sub zext (setcc), x => addcarry 0, x, setcc
- // sub sext (setcc), x => subcarry 0, x, setcc
- Args[0] = Zero;
- Args[1] = LHS;
- Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::SUBCARRY : ISD::ADDCARRY;
- } else {
- Args[0] = LHS;
- Args[1] = Zero;
- Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::ADDCARRY : ISD::SUBCARRY;
- }
-
+ SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond };
+ Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::ADDCARRY : ISD::SUBCARRY;
return DAG.getNode(Opc, SL, VTList, Args);
}
}
; GCN-LABEL: {{^}}sub_sube_commuted:
; GCN-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN-DAG: buffer_load_dword [[V:v[0-9]+]],
-; GCN: v_addc_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
-; GCN: v_add_i32_e32 {{.*}}, 0x64, [[ADDC]]
+; GCN: v_cndmask_b32_e64 [[CCZEXT:v[0-9]+]], 0, 1, [[CC]]
+; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, [[CCZEXT]], v4
+; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, {{.*}}, [[SUB]]
+; GCN: v_add_i32_e32 {{.*}}, 0x64, [[ADD]]
; GFX9-LABEL: {{^}}sub_sube_commuted:
; GFX9-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GFX9-DAG: global_load_dword [[V:v[0-9]+]],
-; GFX9: v_addc_co_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]]
-; GFX9: v_add_u32_e32 {{.*}}, 0x64, [[ADDC]]
+; GFX9-DAG: v_cndmask_b32_e64 [[CCZEXT:v[0-9]+]], 0, 1, [[CC]]
+; GFX9: v_sub_u32_e32 {{.*}}, [[CCZEXT]]
+; GFX9: v_add_u32_e32
+; GFX9: v_add_u32_e32 {{.*}}, 0x64,
define amdgpu_kernel void @sub_sube_commuted(i32 addrspace(1)* nocapture %arg, i32 %a) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
; Check case where sub is commuted with zext
; GCN-LABEL: {{^}}sub_zext_setcc_commute:
; GCN: v_cmp_gt_u32_e32 vcc, v
-; GCN-NOT: vcc
-; GCN: v_addc_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc
-; GCN: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, s{{[0-9]+}}, [[ADDC]]
+; GCN: v_cndmask
+; GCN: v_sub_i32_e32
+; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc,
+; GCN: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, s{{[0-9]+}}, [[ADD]]
define amdgpu_kernel void @sub_zext_setcc_commute(i32 addrspace(1)* nocapture %arg, i32 %a, i32%b) {
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
; Check case where sub is commuted with sext
; GCN-LABEL: {{^}}sub_sext_setcc_commute:
; GCN: v_cmp_gt_u32_e32 vcc, v
-; GCN-NOT: vcc
-; GCN: v_subb_u32_e32 [[SUBB:v[0-9]+]], vcc, 0, v{{[0-9]+}}, vcc
-; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[SUBB]]
+; GCN: v_cndmask
+; GCN: v_sub_i32_e32
+; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc,
; GCN: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, s{{[0-9]+}}, [[ADD]]
define amdgpu_kernel void @sub_sext_setcc_commute(i32 addrspace(1)* nocapture %arg, i32 %a, i32%b) {
bb:
--- /dev/null
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs <%s | FileCheck -check-prefixes=GCN %s
+;
+; This test checks that we have the correct fold for zext(cc1) - zext(cc2).
+;
+; GCN-LABEL: sub_zext_zext:
+; GCN: ds_read_b32 [[VAL:v[0-9]+]],
+; GCN-DAG: v_cmp_lt_f32{{.*}} [[CC1:s\[[0-9]+:[0-9]+\]]], 0, [[VAL]]
+; GCN-DAG: v_cmp_gt_f32{{.*}} vcc, 0, [[VAL]]
+; GCN: v_cndmask_{{.*}} [[ZEXTCC1:v[0-9]+]], 0, 1, [[CC1]]
+; GCN: v_subbrev{{.*}} {{v[0-9]+}}, vcc, 0, [[ZEXTCC1]], vcc
+;
+; Before the reversion that this test is attached to, the compiler commuted
+; the operands to the sub and used different logic to select the addc/subc
+; instruction:
+; sub zext (setcc), x => addcarry 0, x, setcc
+; sub sext (setcc), x => subcarry 0, x, setcc
+;
+; ... but that is bogus. I believe it is not possible to fold those commuted
+; patterns into any form of addcarry or subcarry.
+
+define amdgpu_cs float @sub_zext_zext() {
+.entry:
+
+ %t519 = load float, float addrspace(3)* null
+
+ %t524 = fcmp ogt float %t519, 0.000000e+00
+ %t525 = fcmp olt float %t519, 0.000000e+00
+ %t526 = zext i1 %t524 to i32
+ %t527 = zext i1 %t525 to i32
+ %t528 = sub nsw i32 %t526, %t527
+ %t529 = sitofp i32 %t528 to float
+ ret float %t529
+}
+