return DAG.getNode(Opc, SL, VT, Add1, Op2);
}
-SDValue SITargetLowering::reassociateSub(SDNode *N, SelectionDAG &DAG) const {
- EVT VT = N->getValueType(0);
- if (VT != MVT::i32 && VT != MVT::i64)
- return SDValue();
-
- if (DAG.isBaseWithConstantOffset(SDValue(N, 0)))
- return SDValue();
-
- unsigned Opc = N->getOpcode();
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
-
- if (!(Op0->isDivergent() ^ Op1->isDivergent()))
- return SDValue();
-
- SDLoc SL(N);
- if (Op1->isDivergent() && Op1->hasOneUse()) {
- unsigned Op1Opc = Op1.getOpcode();
- if (Op1Opc != ISD::ADD && Op1Opc != ISD::SUB)
- return SDValue();
-
- SDValue Op2 = Op1.getOperand(1);
- Op1 = Op1.getOperand(0);
- if (Opc == ISD::ADD && Op1Opc == ISD::SUB) {
- // s0 + (s1 - v0) --> (s0 + s1) - v0
- if (!Op1->isDivergent() && Op2->isDivergent())
- return DAG.getNode(ISD::SUB, SL, VT,
- DAG.getNode(ISD::ADD, SL, VT, Op0, Op1), Op2);
- // s0 + (v0 - s1) --> (s0 - s1) + v0
- if (Op1->isDivergent() && !Op2->isDivergent())
- return DAG.getNode(ISD::ADD, SL, VT,
- DAG.getNode(ISD::SUB, SL, VT, Op0, Op2), Op1);
- } else if (Opc == ISD::SUB) {
- if (Op1Opc == ISD::SUB) {
- // s0 - (s1 - v0) --> (s0 - s1) + v0
- if (!Op1->isDivergent() && Op2->isDivergent())
- return DAG.getNode(ISD::ADD, SL, VT,
- DAG.getNode(ISD::SUB, SL, VT, Op0, Op1), Op2);
- // s0 - (v0 - s1) --> (s0 + s1) - v0
- if (Op1->isDivergent() && !Op2->isDivergent())
- return DAG.getNode(ISD::SUB, SL, VT,
- DAG.getNode(ISD::ADD, SL, VT, Op0, Op2), Op1);
- } else if (Op1Opc == ISD::ADD) {
- // s0 - (s1 + v0) --> (s0 - s1) - v0
- if (Op1->isDivergent() ^ Op2->isDivergent()) {
- if (Op1->isDivergent())
- std::swap(Op1, Op2);
- return DAG.getNode(ISD::SUB, SL, VT,
- DAG.getNode(ISD::SUB, SL, VT, Op0, Op1), Op2);
- }
- }
- }
- }
-
- if (Op0->isDivergent() && Op0->hasOneUse()) {
- unsigned Op0Opc = Op0.getOpcode();
- if (Op0Opc != ISD::ADD && Op0Opc != ISD::SUB)
- return SDValue();
-
- SDValue Op2 = Op0.getOperand(1);
- Op0 = Op0.getOperand(0);
- if (!Op0->isDivergent() && Op2->isDivergent()) {
- if (Opc == ISD::SUB) {
- // (s1 + v0) - s0 --> (s1 - s0) + v0
- if (Op0Opc == ISD::ADD)
- return DAG.getNode(ISD::ADD, SL, VT,
- DAG.getNode(ISD::SUB, SL, VT, Op0, Op1), Op2);
-
- // (s1 - v0) - s0 --> (s1 - s0) - v0
- if (Op0Opc == ISD::SUB)
- return DAG.getNode(ISD::SUB, SL, VT,
- DAG.getNode(ISD::SUB, SL, VT, Op0, Op1), Op2);
- } else if (Opc == ISD::ADD && Op0Opc == ISD::SUB) {
- // (s1 - v0) + s0 --> (s0 + s1) - v0
- return DAG.getNode(ISD::SUB, SL, VT,
- DAG.getNode(ISD::ADD, SL, VT, Op0, Op1), Op2);
- }
- }
-
- if (Op0->isDivergent() && !Op2->isDivergent()) {
- // (v0 - s1) + s0 --> (s0 - s1) + v0
- if (Opc == ISD::ADD && Op0Opc == ISD::SUB)
- return DAG.getNode(ISD::ADD, SL, VT,
- DAG.getNode(ISD::SUB, SL, VT, Op1, Op2), Op0);
- }
- }
-
- return SDValue();
-}
-
static SDValue getMad64_32(SelectionDAG &DAG, const SDLoc &SL,
EVT VT,
SDValue N0, SDValue N1, SDValue N2,
return V;
}
- if (SDValue V = reassociateSub(N, DAG))
- return V;
-
if (VT != MVT::i32 || !DCI.isAfterLegalizeDAG())
return SDValue();
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
- if (SDValue V = reassociateSub(N, DAG))
- return V;
-
if (VT != MVT::i32)
return SDValue();
+++ /dev/null
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
-
-; s0 + (s1 - v0) --> (s0 + s1) - v0
-define amdgpu_kernel void @reassoc_sub_add_s0s1v0_i32(ptr addrspace(1) %arg, i32 %x, i32 %y) {
-; GFX8-LABEL: reassoc_sub_add_s0s1v0_i32:
-; GFX8: ; %bb.0: ; %bb
-; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: s_add_i32 s2, s3, s2
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s2, v0
-; GFX8-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-NEXT: v_mov_b32_e32 v1, s1
-; GFX8-NEXT: flat_store_dword v[0:1], v2
-; GFX8-NEXT: s_endpgm
-;
-; GFX9-LABEL: reassoc_sub_add_s0s1v0_i32:
-; GFX9: ; %bb.0: ; %bb
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_add_i32 s2, s3, s2
-; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9-NEXT: s_endpgm
-bb:
- %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
- %sub = sub i32 %x, %tid
- %add = add i32 %y, %sub
- store i32 %add, ptr addrspace(1) %arg, align 4
- ret void
-}
-
-; s0 + (v0 - s1) --> (s0 - s1) + v0
-define amdgpu_kernel void @reassoc_sub_add_s0v0s1_i32(ptr addrspace(1) %arg, i32 %x, i32 %y) {
-; GFX8-LABEL: reassoc_sub_add_s0v0s1_i32:
-; GFX8: ; %bb.0: ; %bb
-; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: s_sub_i32 s2, s3, s2
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, s2, v0
-; GFX8-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-NEXT: v_mov_b32_e32 v1, s1
-; GFX8-NEXT: flat_store_dword v[0:1], v2
-; GFX8-NEXT: s_endpgm
-;
-; GFX9-LABEL: reassoc_sub_add_s0v0s1_i32:
-; GFX9: ; %bb.0: ; %bb
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_sub_i32 s2, s3, s2
-; GFX9-NEXT: v_add_u32_e32 v0, s2, v0
-; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9-NEXT: s_endpgm
-bb:
- %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
- %sub = sub i32 %tid, %x
- %add = add i32 %y, %sub
- store i32 %add, ptr addrspace(1) %arg, align 4
- ret void
-}
-
-; (v0 - s1) + s0 --> (s0 - s1) + v0
-define amdgpu_kernel void @reassoc_sub_add_v0s1s0_i32(ptr addrspace(1) %arg, i32 %x, i32 %y) {
-; GFX8-LABEL: reassoc_sub_add_v0s1s0_i32:
-; GFX8: ; %bb.0: ; %bb
-; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: s_sub_i32 s2, s3, s2
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, s2, v0
-; GFX8-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-NEXT: v_mov_b32_e32 v1, s1
-; GFX8-NEXT: flat_store_dword v[0:1], v2
-; GFX8-NEXT: s_endpgm
-;
-; GFX9-LABEL: reassoc_sub_add_v0s1s0_i32:
-; GFX9: ; %bb.0: ; %bb
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_sub_i32 s2, s3, s2
-; GFX9-NEXT: v_add_u32_e32 v0, s2, v0
-; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9-NEXT: s_endpgm
-bb:
- %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
- %sub = sub i32 %tid, %x
- %add = add i32 %sub, %y
- store i32 %add, ptr addrspace(1) %arg, align 4
- ret void
-}
-
-; s0 - (s1 - v0) --> (s0 - s1) + v0
-define amdgpu_kernel void @reassoc_sub_sub_s0s1v0_i32(ptr addrspace(1) %arg, i32 %x, i32 %y) {
-; GFX8-LABEL: reassoc_sub_sub_s0s1v0_i32:
-; GFX8: ; %bb.0: ; %bb
-; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: s_sub_i32 s2, s3, s2
-; GFX8-NEXT: v_add_u32_e32 v2, vcc, s2, v0
-; GFX8-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-NEXT: v_mov_b32_e32 v1, s1
-; GFX8-NEXT: flat_store_dword v[0:1], v2
-; GFX8-NEXT: s_endpgm
-;
-; GFX9-LABEL: reassoc_sub_sub_s0s1v0_i32:
-; GFX9: ; %bb.0: ; %bb
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_sub_i32 s2, s3, s2
-; GFX9-NEXT: v_add_u32_e32 v0, s2, v0
-; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9-NEXT: s_endpgm
-bb:
- %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
- %sub1 = sub i32 %x, %tid
- %sub2 = sub i32 %y, %sub1
- store i32 %sub2, ptr addrspace(1) %arg, align 4
- ret void
-}
-
-; s0 - (v0 - s1) --> (s0 + s1) - v0
-define amdgpu_kernel void @reassoc_sub_sub_s0v0s1_i32(ptr addrspace(1) %arg, i32 %x, i32 %y) {
-; GFX8-LABEL: reassoc_sub_sub_s0v0s1_i32:
-; GFX8: ; %bb.0: ; %bb
-; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: s_add_i32 s2, s3, s2
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s2, v0
-; GFX8-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-NEXT: v_mov_b32_e32 v1, s1
-; GFX8-NEXT: flat_store_dword v[0:1], v2
-; GFX8-NEXT: s_endpgm
-;
-; GFX9-LABEL: reassoc_sub_sub_s0v0s1_i32:
-; GFX9: ; %bb.0: ; %bb
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_add_i32 s2, s3, s2
-; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9-NEXT: s_endpgm
-bb:
- %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
- %sub1 = sub i32 %tid, %x
- %sub2 = sub i32 %y, %sub1
- store i32 %sub2, ptr addrspace(1) %arg, align 4
- ret void
-}
-
-; s0 - (s1 + v0) --> (s0 - s1) - v0
-define amdgpu_kernel void @reassoc_add_sub_s0s1v0_i32(ptr addrspace(1) %arg, i32 %x, i32 %y) {
-; GFX8-LABEL: reassoc_add_sub_s0s1v0_i32:
-; GFX8: ; %bb.0: ; %bb
-; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: s_sub_i32 s2, s3, s2
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s2, v0
-; GFX8-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-NEXT: v_mov_b32_e32 v1, s1
-; GFX8-NEXT: flat_store_dword v[0:1], v2
-; GFX8-NEXT: s_endpgm
-;
-; GFX9-LABEL: reassoc_add_sub_s0s1v0_i32:
-; GFX9: ; %bb.0: ; %bb
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_sub_i32 s2, s3, s2
-; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9-NEXT: s_endpgm
-bb:
- %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
- %add = add i32 %x, %tid
- %sub = sub i32 %y, %add
- store i32 %sub, ptr addrspace(1) %arg, align 4
- ret void
-}
-
-; s0 - (v0 + s1) --> (s0 - s1) - v0
-define amdgpu_kernel void @reassoc_add_sub_s0v0s1_i32(ptr addrspace(1) %arg, i32 %x, i32 %y) {
-; GFX8-LABEL: reassoc_add_sub_s0v0s1_i32:
-; GFX8: ; %bb.0: ; %bb
-; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: s_sub_i32 s2, s3, s2
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s2, v0
-; GFX8-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-NEXT: v_mov_b32_e32 v1, s1
-; GFX8-NEXT: flat_store_dword v[0:1], v2
-; GFX8-NEXT: s_endpgm
-;
-; GFX9-LABEL: reassoc_add_sub_s0v0s1_i32:
-; GFX9: ; %bb.0: ; %bb
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_sub_i32 s2, s3, s2
-; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9-NEXT: s_endpgm
-bb:
- %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
- %add = add i32 %tid, %x
- %sub = sub i32 %y, %add
- store i32 %sub, ptr addrspace(1) %arg, align 4
- ret void
-}
-
-; (s1 + v0) - s0 --> (s1 - s0) + v0
-define amdgpu_kernel void @reassoc_add_sub_s1v0s0_i32(ptr addrspace(1) %arg, i32 %x, i32 %y) {
-; GFX8-LABEL: reassoc_add_sub_s1v0s0_i32:
-; GFX8: ; %bb.0: ; %bb
-; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: s_sub_i32 s2, s3, s2
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s2, v0
-; GFX8-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-NEXT: v_mov_b32_e32 v1, s1
-; GFX8-NEXT: flat_store_dword v[0:1], v2
-; GFX8-NEXT: s_endpgm
-;
-; GFX9-LABEL: reassoc_add_sub_s1v0s0_i32:
-; GFX9: ; %bb.0: ; %bb
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_sub_i32 s2, s3, s2
-; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9-NEXT: s_endpgm
-bb:
- %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
- %add = add i32 %tid, %x
- %sub = sub i32 %y, %add
- store i32 %sub, ptr addrspace(1) %arg, align 4
- ret void
-}
-
-; (s1 - v0) - s0 --> (s1 - s0) - v0
-define amdgpu_kernel void @reassoc_sub_sub_s1v0s0_i32(ptr addrspace(1) %arg, i32 %x, i32 %y) {
-; GFX8-LABEL: reassoc_sub_sub_s1v0s0_i32:
-; GFX8: ; %bb.0: ; %bb
-; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: s_sub_i32 s2, s2, s3
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s2, v0
-; GFX8-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-NEXT: v_mov_b32_e32 v1, s1
-; GFX8-NEXT: flat_store_dword v[0:1], v2
-; GFX8-NEXT: s_endpgm
-;
-; GFX9-LABEL: reassoc_sub_sub_s1v0s0_i32:
-; GFX9: ; %bb.0: ; %bb
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_sub_i32 s2, s2, s3
-; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9-NEXT: s_endpgm
-bb:
- %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
- %sub1 = sub i32 %x, %tid
- %sub2 = sub i32 %sub1, %y
- store i32 %sub2, ptr addrspace(1) %arg, align 4
- ret void
-}
-
-; (s1 - v0) + s0 --> (s0 + s1) - v0
-define amdgpu_kernel void @reassoc_sub_add_s1v0s0_i32(ptr addrspace(1) %arg, i32 %x, i32 %y) {
-; GFX8-LABEL: reassoc_sub_add_s1v0s0_i32:
-; GFX8: ; %bb.0: ; %bb
-; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: s_add_i32 s2, s2, s3
-; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s2, v0
-; GFX8-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-NEXT: v_mov_b32_e32 v1, s1
-; GFX8-NEXT: flat_store_dword v[0:1], v2
-; GFX8-NEXT: s_endpgm
-;
-; GFX9-LABEL: reassoc_sub_add_s1v0s0_i32:
-; GFX9: ; %bb.0: ; %bb
-; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_add_i32 s2, s2, s3
-; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9-NEXT: s_endpgm
-bb:
- %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
- %sub = sub i32 %x, %tid
- %add = add i32 %sub, %y
- store i32 %add, ptr addrspace(1) %arg, align 4
- ret void
-}
-
-declare i32 @llvm.amdgcn.workitem.id.x()