From 83f0bcef7a3e96d022f8d31fd87c8363fd4f9a00 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 29 Jan 2015 16:55:25 +0000 Subject: [PATCH] R600/SI: Define a schedule model and enable the generic machine scheduler The schedule model is not complete yet, and could be improved. llvm-svn: 227461 --- llvm/lib/Target/R600/AMDGPUSubtarget.cpp | 19 ++++++++ llvm/lib/Target/R600/AMDGPUSubtarget.h | 14 +++++- llvm/lib/Target/R600/SIRegisterInfo.cpp | 55 ++++++++++++++++++++-- llvm/lib/Target/R600/SIRegisterInfo.h | 12 ++++- llvm/test/CodeGen/R600/address-space.ll | 3 +- llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll | 6 +-- llvm/test/CodeGen/R600/ctpop.ll | 4 +- llvm/test/CodeGen/R600/cvt_f32_ubyte.ll | 6 +-- llvm/test/CodeGen/R600/ds_read2st64.ll | 4 +- llvm/test/CodeGen/R600/fceil64.ll | 6 +-- llvm/test/CodeGen/R600/ffloor.f64.ll | 6 +-- llvm/test/CodeGen/R600/ffloor.ll | 2 - llvm/test/CodeGen/R600/fmax3.ll | 6 +-- llvm/test/CodeGen/R600/fmin3.ll | 6 +-- llvm/test/CodeGen/R600/fneg-fabs.f64.ll | 2 +- llvm/test/CodeGen/R600/ftrunc.f64.ll | 4 +- llvm/test/CodeGen/R600/imm.ll | 34 ++++++------- llvm/test/CodeGen/R600/llvm.memcpy.ll | 34 ++++++------- llvm/test/CodeGen/R600/llvm.round.f64.ll | 2 +- llvm/test/CodeGen/R600/llvm.round.ll | 4 +- llvm/test/CodeGen/R600/local-atomics.ll | 4 +- llvm/test/CodeGen/R600/local-atomics64.ll | 6 +-- llvm/test/CodeGen/R600/local-memory-two-objects.ll | 4 +- llvm/test/CodeGen/R600/or.ll | 2 +- .../CodeGen/R600/si-triv-disjoint-mem-access.ll | 2 +- llvm/test/CodeGen/R600/smrd.ll | 10 ++-- llvm/test/CodeGen/R600/trunc.ll | 2 + llvm/test/CodeGen/R600/udivrem.ll | 24 ++-------- llvm/test/CodeGen/R600/valu-i1.ll | 3 +- llvm/test/CodeGen/R600/wait.ll | 5 +- llvm/test/CodeGen/R600/xor.ll | 2 +- llvm/test/CodeGen/R600/zero_extend.ll | 2 +- 32 files changed, 185 insertions(+), 110 deletions(-) diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.cpp b/llvm/lib/Target/R600/AMDGPUSubtarget.cpp index 39cc383..541dbab 100644 --- a/llvm/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/R600/AMDGPUSubtarget.cpp @@ -20,6 +20,7 @@ #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/MachineScheduler.h" using namespace llvm; @@ -111,3 +112,21 @@ bool AMDGPUSubtarget::isVGPRSpillingEnabled( const SIMachineFunctionInfo *MFI) const { return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling; } + +void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, + MachineInstr *end, + unsigned NumRegionInstrs) const { + if (getGeneration() >= SOUTHERN_ISLANDS) { + + // Track register pressure so the scheduler can try to decrease + // pressure once register usage is above the threshold defined by + // SIRegisterInfo::getRegPressureSetLimit() + Policy.ShouldTrackPressure = true; + + // Enabling both top down and bottom up scheduling seems to give us less + // register spills than just using one of these approaches on its own. + Policy.OnlyTopDown = false; + Policy.OnlyBottomUp = false; + } +} diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.h b/llvm/lib/Target/R600/AMDGPUSubtarget.h index d639f7c..389cc8c 100644 --- a/llvm/lib/Target/R600/AMDGPUSubtarget.h +++ b/llvm/lib/Target/R600/AMDGPUSubtarget.h @@ -204,9 +204,13 @@ public: unsigned getAmdKernelCodeChipID() const; bool enableMachineScheduler() const override { - return getGeneration() <= NORTHERN_ISLANDS; + return true; } + void overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, MachineInstr *end, + unsigned NumRegionInstrs) const override; + // Helper functions to simplify if statements bool isTargetELF() const { return false; @@ -226,6 +230,14 @@ public: return TargetTriple.getOS() == Triple::AMDHSA; } bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const; + + unsigned getMaxWavesPerCU() const { + if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) + return 10; + + // FIXME: Not sure what this is for other subtagets. + llvm_unreachable("do not know max waves per CU for this subtarget."); + } }; } // End namespace llvm diff --git a/llvm/lib/Target/R600/SIRegisterInfo.cpp b/llvm/lib/Target/R600/SIRegisterInfo.cpp index 380c98b..122e30c 100644 --- a/llvm/lib/Target/R600/SIRegisterInfo.cpp +++ b/llvm/lib/Target/R600/SIRegisterInfo.cpp @@ -51,9 +51,32 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const { - return RC->getNumRegs(); +unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const { + + // FIXME: We should adjust the max number of waves based on LDS size. + unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU()); + unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU()); + + for (regclass_iterator I = regclass_begin(), E = regclass_end(); + I != E; ++I) { + + unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1); + unsigned Limit; + + if (isSGPRClass(*I)) { + Limit = SGPRLimit / NumSubRegs; + } else { + Limit = VGPRLimit / NumSubRegs; + } + + const int *Sets = getRegClassPressureSets(*I); + assert(Sets); + for (unsigned i = 0; Sets[i] != -1; ++i) { + if (Sets[i] == (int)Idx) + return Limit; + } + } + return 256; } bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { @@ -455,3 +478,29 @@ unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI, return AMDGPU::NoRegister; } +unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const { + switch(WaveCount) { + case 10: return 24; + case 9: return 28; + case 8: return 32; + case 7: return 36; + case 6: return 40; + case 5: return 48; + case 4: return 64; + case 3: return 84; + case 2: return 128; + default: return 256; + } +} + +unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const { + switch(WaveCount) { + case 10: return 48; + case 9: return 56; + case 8: return 64; + case 7: return 72; + case 6: return 80; + case 5: return 96; + default: return 103; + } +} diff --git a/llvm/lib/Target/R600/SIRegisterInfo.h b/llvm/lib/Target/R600/SIRegisterInfo.h index 8aa02c3..d908ffd 100644 --- a/llvm/lib/Target/R600/SIRegisterInfo.h +++ b/llvm/lib/Target/R600/SIRegisterInfo.h @@ -17,6 +17,7 @@ #define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H #include "AMDGPURegisterInfo.h" +#include "llvm/Support/Debug.h" namespace llvm { @@ -26,8 +27,7 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const override; - unsigned getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const override; + unsigned getRegPressureSetLimit(unsigned Idx) const override; bool requiresRegisterScavenging(const MachineFunction &Fn) const override; @@ -105,6 +105,14 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { unsigned getPreloadedValue(const MachineFunction &MF, enum PreloadedValue Value) const; + /// \brief Give the maximum number of VGPRs that can be used by \p WaveCount + /// concurrent waves. + unsigned getNumVGPRsAllowed(unsigned WaveCount) const; + + /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount + /// concurrent waves. + unsigned getNumSGPRsAllowed(unsigned WaveCount) const; + unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC) const; diff --git a/llvm/test/CodeGen/R600/address-space.ll b/llvm/test/CodeGen/R600/address-space.ll index aaa0628..74ea9f0 100644 --- a/llvm/test/CodeGen/R600/address-space.ll +++ b/llvm/test/CodeGen/R600/address-space.ll @@ -10,9 +10,10 @@ ; CHECK-LABEL: {{^}}do_as_ptr_calcs: ; CHECK: s_load_dword [[SREG1:s[0-9]+]], +; CHECK: v_mov_b32_e32 [[VREG2:v[0-9]+]], [[SREG1]] ; CHECK: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]] ; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG1]] offset:12 -; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:20 +; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG2]] offset:20 define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind { entry: %x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0 diff --git a/llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll b/llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll index 0d5ece4..bbca758 100644 --- a/llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll +++ b/llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll @@ -2,9 +2,9 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset: +; SI: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7 ; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc -; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7 ; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]] ; SI: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0] @@ -18,10 +18,10 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs } ; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset: -; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb -; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd ; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7 ; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0 +; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd ; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]] ; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]] diff --git a/llvm/test/CodeGen/R600/ctpop.ll b/llvm/test/CodeGen/R600/ctpop.ll index c64f443..b0a9996 100644 --- a/llvm/test/CodeGen/R600/ctpop.ll +++ b/llvm/test/CodeGen/R600/ctpop.ll @@ -37,10 +37,10 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali } ; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32: -; SI: buffer_load_dword [[VAL0:v[0-9]+]], ; SI: buffer_load_dword [[VAL1:v[0-9]+]], +; SI: buffer_load_dword [[VAL0:v[0-9]+]], ; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0 -; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] +; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm diff --git a/llvm/test/CodeGen/R600/cvt_f32_ubyte.ll b/llvm/test/CodeGen/R600/cvt_f32_ubyte.ll index afcc95e..710a400 100644 --- a/llvm/test/CodeGen/R600/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/R600/cvt_f32_ubyte.ll @@ -63,10 +63,10 @@ define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> ; position in the word for the component. ; SI-LABEL: {{^}}load_v4i8_to_v4f32_unaligned: -; SI: buffer_load_ubyte [[LOADREG0:v[0-9]+]] -; SI: buffer_load_ubyte [[LOADREG1:v[0-9]+]] -; SI: buffer_load_ubyte [[LOADREG2:v[0-9]+]] ; SI: buffer_load_ubyte [[LOADREG3:v[0-9]+]] +; SI: buffer_load_ubyte [[LOADREG2:v[0-9]+]] +; SI: buffer_load_ubyte [[LOADREG1:v[0-9]+]] +; SI: buffer_load_ubyte [[LOADREG0:v[0-9]+]] ; SI-NOT: v_lshlrev_b32 ; SI-NOT: v_or_b32 diff --git a/llvm/test/CodeGen/R600/ds_read2st64.ll b/llvm/test/CodeGen/R600/ds_read2st64.ll index 24834af..efd875e 100644 --- a/llvm/test/CodeGen/R600/ds_read2st64.ll +++ b/llvm/test/CodeGen/R600/ds_read2st64.ll @@ -65,8 +65,8 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add ; SI-LABEL: @simple_read2st64_f32_over_max_offset ; SI-NOT: ds_read2st64_b32 -; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256 ; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}} +; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256 ; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]] ; SI: s_endpgm define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { @@ -197,8 +197,8 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a ; SI-LABEL: @simple_read2st64_f64_over_max_offset ; SI-NOT: ds_read2st64_b64 -; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 ; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}} +; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]] ; SI: s_endpgm define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { diff --git a/llvm/test/CodeGen/R600/fceil64.ll b/llvm/test/CodeGen/R600/fceil64.ll index 77cd8ea..19396aa 100644 --- a/llvm/test/CodeGen/R600/fceil64.ll +++ b/llvm/test/CodeGen/R600/fceil64.ll @@ -11,19 +11,19 @@ declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone ; FUNC-LABEL: {{^}}fceil_f64: ; CI: v_ceil_f64_e32 ; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 +; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 ; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01 ; SI: s_lshr_b64 ; SI: s_not_b64 ; SI: s_and_b64 -; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 -; SI-DAG: cmp_lt_i32 +; SI: cmp_lt_i32 ; SI: cndmask_b32 ; SI: cndmask_b32 ; SI: cmp_gt_i32 ; SI: cndmask_b32 ; SI: cndmask_b32 -; SI: v_cmp_lg_f64 ; SI: v_cmp_gt_f64 +; SI: v_cmp_lg_f64 ; SI: s_and_b64 ; SI: v_cndmask_b32 ; SI: v_cndmask_b32 diff --git a/llvm/test/CodeGen/R600/ffloor.f64.ll b/llvm/test/CodeGen/R600/ffloor.f64.ll index 194d0aa..a63568e 100644 --- a/llvm/test/CodeGen/R600/ffloor.f64.ll +++ b/llvm/test/CodeGen/R600/ffloor.f64.ll @@ -12,19 +12,19 @@ declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone ; CI: v_floor_f64_e32 ; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 +; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 ; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01 ; SI: s_lshr_b64 ; SI: s_not_b64 ; SI: s_and_b64 -; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 -; SI-DAG: cmp_lt_i32 +; SI: cmp_lt_i32 ; SI: cndmask_b32 ; SI: cndmask_b32 ; SI: cmp_gt_i32 ; SI: cndmask_b32 ; SI: cndmask_b32 -; SI: v_cmp_lg_f64 ; SI: v_cmp_lt_f64 +; SI: v_cmp_lg_f64 ; SI: s_and_b64 ; SI: v_cndmask_b32 ; SI: v_cndmask_b32 diff --git a/llvm/test/CodeGen/R600/ffloor.ll b/llvm/test/CodeGen/R600/ffloor.ll index e235d02..61c46ac 100644 --- a/llvm/test/CodeGen/R600/ffloor.ll +++ b/llvm/test/CodeGen/R600/ffloor.ll @@ -15,8 +15,6 @@ define void @floor_f32(float addrspace(1)* %out, float %in) { ; SI: v_floor_f32_e32 ; SI: v_floor_f32_e32 -; R600: FLOOR -; R600: FLOOR define void @floor_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { %tmp = call <2 x float> @llvm.floor.v2f32(<2 x float> %in) #0 store <2 x float> %tmp, <2 x float> addrspace(1)* %out diff --git a/llvm/test/CodeGen/R600/fmax3.ll b/llvm/test/CodeGen/R600/fmax3.ll index e1b477c..629c032 100644 --- a/llvm/test/CodeGen/R600/fmax3.ll +++ b/llvm/test/CodeGen/R600/fmax3.ll @@ -4,9 +4,9 @@ declare float @llvm.maxnum.f32(float, float) nounwind readnone ; SI-LABEL: {{^}}test_fmax3_olt_0: -; SI: buffer_load_dword [[REGA:v[0-9]+]] -; SI: buffer_load_dword [[REGB:v[0-9]+]] ; SI: buffer_load_dword [[REGC:v[0-9]+]] +; SI: buffer_load_dword [[REGB:v[0-9]+]] +; SI: buffer_load_dword [[REGA:v[0-9]+]] ; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm @@ -22,8 +22,8 @@ define void @test_fmax3_olt_0(float addrspace(1)* %out, float addrspace(1)* %apt ; Commute operand of second fmax ; SI-LABEL: {{^}}test_fmax3_olt_1: -; SI: buffer_load_dword [[REGA:v[0-9]+]] ; SI: buffer_load_dword [[REGB:v[0-9]+]] +; SI: buffer_load_dword [[REGA:v[0-9]+]] ; SI: buffer_load_dword [[REGC:v[0-9]+]] ; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] ; SI: buffer_store_dword [[RESULT]], diff --git a/llvm/test/CodeGen/R600/fmin3.ll b/llvm/test/CodeGen/R600/fmin3.ll index 716beb1..e3acb31 100644 --- a/llvm/test/CodeGen/R600/fmin3.ll +++ b/llvm/test/CodeGen/R600/fmin3.ll @@ -5,9 +5,9 @@ declare float @llvm.minnum.f32(float, float) nounwind readnone ; SI-LABEL: {{^}}test_fmin3_olt_0: -; SI: buffer_load_dword [[REGA:v[0-9]+]] -; SI: buffer_load_dword [[REGB:v[0-9]+]] ; SI: buffer_load_dword [[REGC:v[0-9]+]] +; SI: buffer_load_dword [[REGB:v[0-9]+]] +; SI: buffer_load_dword [[REGA:v[0-9]+]] ; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm @@ -23,8 +23,8 @@ define void @test_fmin3_olt_0(float addrspace(1)* %out, float addrspace(1)* %apt ; Commute operand of second fmin ; SI-LABEL: {{^}}test_fmin3_olt_1: -; SI: buffer_load_dword [[REGA:v[0-9]+]] ; SI: buffer_load_dword [[REGB:v[0-9]+]] +; SI: buffer_load_dword [[REGA:v[0-9]+]] ; SI: buffer_load_dword [[REGC:v[0-9]+]] ; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] ; SI: buffer_store_dword [[RESULT]], diff --git a/llvm/test/CodeGen/R600/fneg-fabs.f64.ll b/llvm/test/CodeGen/R600/fneg-fabs.f64.ll index 7430e7f..ee9f82d 100644 --- a/llvm/test/CodeGen/R600/fneg-fabs.f64.ll +++ b/llvm/test/CodeGen/R600/fneg-fabs.f64.ll @@ -57,8 +57,8 @@ define void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) { } ; FUNC-LABEL: {{^}}fneg_fabs_f64: -; SI: s_load_dwordx2 ; SI: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}} +; SI: s_load_dwordx2 ; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000 ; SI-DAG: v_or_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]], [[IMMREG]] ; SI-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]] diff --git a/llvm/test/CodeGen/R600/ftrunc.f64.ll b/llvm/test/CodeGen/R600/ftrunc.f64.ll index 2c7217e..faf9b40 100644 --- a/llvm/test/CodeGen/R600/ftrunc.f64.ll +++ b/llvm/test/CodeGen/R600/ftrunc.f64.ll @@ -23,12 +23,12 @@ define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) { ; CI: v_trunc_f64_e32 ; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 +; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 ; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01 ; SI: s_lshr_b64 +; SI: cmp_lt_i32 ; SI: s_not_b64 ; SI: s_and_b64 -; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 -; SI: cmp_lt_i32 ; SI: cndmask_b32 ; SI: cndmask_b32 ; SI: cmp_gt_i32 diff --git a/llvm/test/CodeGen/R600/imm.ll b/llvm/test/CodeGen/R600/imm.ll index 6e4fa3c..8a24477 100644 --- a/llvm/test/CodeGen/R600/imm.ll +++ b/llvm/test/CodeGen/R600/imm.ll @@ -305,7 +305,7 @@ define void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) { ; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0, [[VAL]] -; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 0.0 store double %y, double addrspace(1)* %out @@ -315,7 +315,7 @@ define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) { ; CHECK-LABEL: {{^}}add_inline_imm_0.5_f64 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0.5, [[VAL]] -; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 0.5 store double %y, double addrspace(1)* %out @@ -325,7 +325,7 @@ define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) { ; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f64 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -0.5, [[VAL]] -; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, -0.5 store double %y, double addrspace(1)* %out @@ -335,7 +335,7 @@ define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) { ; CHECK-LABEL: {{^}}add_inline_imm_1.0_f64 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1.0, [[VAL]] -; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 1.0 store double %y, double addrspace(1)* %out @@ -345,7 +345,7 @@ define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) { ; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f64 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1.0, [[VAL]] -; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, -1.0 store double %y, double addrspace(1)* %out @@ -355,7 +355,7 @@ define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) { ; CHECK-LABEL: {{^}}add_inline_imm_2.0_f64 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2.0, [[VAL]] -; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 2.0 store double %y, double addrspace(1)* %out @@ -365,7 +365,7 @@ define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) { ; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f64 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2.0, [[VAL]] -; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, -2.0 store double %y, double addrspace(1)* %out @@ -375,7 +375,7 @@ define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) { ; CHECK-LABEL: {{^}}add_inline_imm_4.0_f64 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 4.0, [[VAL]] -; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 4.0 store double %y, double addrspace(1)* %out @@ -385,7 +385,7 @@ define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) { ; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f64 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -4.0, [[VAL]] -; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, -4.0 store double %y, double addrspace(1)* %out @@ -396,7 +396,7 @@ define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) { ; CHECK-LABEL: {{^}}add_inline_imm_1_f64 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1, [[VAL]] -; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 0x0000000000000001 store double %y, double addrspace(1)* %out @@ -406,7 +406,7 @@ define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) { ; CHECK-LABEL: {{^}}add_inline_imm_2_f64 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2, [[VAL]] -; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 0x0000000000000002 store double %y, double addrspace(1)* %out @@ -416,7 +416,7 @@ define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) { ; CHECK-LABEL: {{^}}add_inline_imm_16_f64 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 16, [[VAL]] -; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 0x0000000000000010 store double %y, double addrspace(1)* %out @@ -426,7 +426,7 @@ define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) { ; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f64 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1, [[VAL]] -; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 0xffffffffffffffff store double %y, double addrspace(1)* %out @@ -436,7 +436,7 @@ define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) { ; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f64 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2, [[VAL]] -; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 0xfffffffffffffffe store double %y, double addrspace(1)* %out @@ -446,7 +446,7 @@ define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) { ; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f64 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -16, [[VAL]] -; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 0xfffffffffffffff0 store double %y, double addrspace(1)* %out @@ -456,7 +456,7 @@ define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) { ; CHECK-LABEL: {{^}}add_inline_imm_63_f64 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 63, [[VAL]] -; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 0x000000000000003F store double %y, double addrspace(1)* %out @@ -466,7 +466,7 @@ define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) { ; CHECK-LABEL: {{^}}add_inline_imm_64_f64 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 64, [[VAL]] -; CHECK-NEXT: buffer_store_dwordx2 [[REG]] +; CHECK: buffer_store_dwordx2 [[REG]] define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) { %y = fadd double %x, 0x0000000000000040 store double %y, double addrspace(1)* %out diff --git a/llvm/test/CodeGen/R600/llvm.memcpy.ll b/llvm/test/CodeGen/R600/llvm.memcpy.ll index d6f5f62..e491732 100644 --- a/llvm/test/CodeGen/R600/llvm.memcpy.ll +++ b/llvm/test/CodeGen/R600/llvm.memcpy.ll @@ -7,39 +7,23 @@ declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1: ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 - ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 + ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 - ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 -; SI: ds_write_b8 ; SI: ds_read_u8 ; SI: ds_read_u8 - ; SI: ds_read_u8 ; SI: ds_read_u8 ; SI: ds_read_u8 @@ -66,6 +50,14 @@ declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace ; SI: ds_write_b8 ; SI: ds_write_b8 ; SI: ds_write_b8 + +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 ; SI: ds_write_b8 ; SI: ds_write_b8 @@ -76,6 +68,14 @@ declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace ; SI: ds_write_b8 ; SI: ds_write_b8 ; SI: ds_write_b8 + +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 ; SI: ds_write_b8 ; SI: s_endpgm diff --git a/llvm/test/CodeGen/R600/llvm.round.f64.ll b/llvm/test/CodeGen/R600/llvm.round.f64.ll index 404cb0f..f21c2ae 100644 --- a/llvm/test/CodeGen/R600/llvm.round.f64.ll +++ b/llvm/test/CodeGen/R600/llvm.round.f64.ll @@ -21,9 +21,9 @@ define void @round_f64(double addrspace(1)* %out, double %x) #0 { ; SI: v_cmp_eq_i32 ; SI: s_mov_b32 [[BFIMASK:s[0-9]+]], 0x7fffffff +; SI: v_cmp_lt_i32_e64 ; SI: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[BFIMASK]] -; SI: v_cmp_lt_i32_e64 ; SI: v_cmp_gt_i32_e64 diff --git a/llvm/test/CodeGen/R600/llvm.round.ll b/llvm/test/CodeGen/R600/llvm.round.ll index 109f4c7..6c1fea3 100644 --- a/llvm/test/CodeGen/R600/llvm.round.ll +++ b/llvm/test/CodeGen/R600/llvm.round.ll @@ -3,11 +3,11 @@ ; FUNC-LABEL: {{^}}round_f32: ; SI-DAG: s_load_dword [[SX:s[0-9]+]] -; SI-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]] ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x7fffffff -; SI: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[K]], 1.0, [[VX]] ; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[SX]] ; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]] +; SI: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]] +; SI: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[K]], 1.0, [[VX]] ; SI: v_cmp_ge_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |[[SUB]]|, 0.5 ; SI: v_cndmask_b32_e64 [[SEL:v[0-9]+]], 0, [[VX]], [[CMP]] ; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SEL]], [[TRUNC]] diff --git a/llvm/test/CodeGen/R600/local-atomics.ll b/llvm/test/CodeGen/R600/local-atomics.ll index 16d3173..78d747d 100644 --- a/llvm/test/CodeGen/R600/local-atomics.ll +++ b/llvm/test/CodeGen/R600/local-atomics.ll @@ -4,8 +4,8 @@ ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32: ; EG: LDS_WRXCHG_RET * -; SI: s_load_dword [[SPTR:s[0-9]+]], ; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; SI: s_load_dword [[SPTR:s[0-9]+]], ; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0] ; SI: buffer_store_dword [[RESULT]], @@ -30,8 +30,8 @@ define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac ; XXX - Is it really necessary to load 4 into VGPR? ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32: ; EG: LDS_ADD_RET * -; SI: s_load_dword [[SPTR:s[0-9]+]], ; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; SI: s_load_dword [[SPTR:s[0-9]+]], ; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; SI: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0] ; SI: buffer_store_dword [[RESULT]], diff --git a/llvm/test/CodeGen/R600/local-atomics64.ll b/llvm/test/CodeGen/R600/local-atomics64.ll index ce6ddbd..cc9d3a7 100644 --- a/llvm/test/CodeGen/R600/local-atomics64.ll +++ b/llvm/test/CodeGen/R600/local-atomics64.ll @@ -29,9 +29,9 @@ define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64_offset: +; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 +; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 ; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb -; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 -; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 ; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; SI: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0] ; SI: buffer_store_dwordx2 [[RESULT]], @@ -274,9 +274,9 @@ define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind { ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64_offset: ; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 -; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 ; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 +; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; SI: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0] ; SI: s_endpgm define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { diff --git a/llvm/test/CodeGen/R600/local-memory-two-objects.ll b/llvm/test/CodeGen/R600/local-memory-two-objects.ll index 5c77ad5..86edfde 100644 --- a/llvm/test/CodeGen/R600/local-memory-two-objects.ll +++ b/llvm/test/CodeGen/R600/local-memory-two-objects.ll @@ -31,8 +31,8 @@ ; EG-CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]] ; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}} ; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]] [M0] -; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16 [M0] -; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] [M0] +; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] [M0] +; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] offset:16 [M0] define void @local_memory_two_objects(i32 addrspace(1)* %out) { entry: diff --git a/llvm/test/CodeGen/R600/or.ll b/llvm/test/CodeGen/R600/or.ll index c651049..78879a8 100644 --- a/llvm/test/CodeGen/R600/or.ll +++ b/llvm/test/CodeGen/R600/or.ll @@ -168,7 +168,7 @@ define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float add } ; FUNC-LABEL: {{^}}s_or_i1: -; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], vcc +; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}] define void @s_or_i1(i1 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) { %cmp0 = icmp eq i32 %a, %b %cmp1 = icmp eq i32 %c, %d diff --git a/llvm/test/CodeGen/R600/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/R600/si-triv-disjoint-mem-access.ll index b2f4a9f..f6dcb38 100644 --- a/llvm/test/CodeGen/R600/si-triv-disjoint-mem-access.ll +++ b/llvm/test/CodeGen/R600/si-triv-disjoint-mem-access.ll @@ -51,8 +51,8 @@ define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspac ; FUNC-LABEL: @no_reorder_barrier_local_load_global_store_local_load ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4 -; CI: buffer_store_dword ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8 +; CI: buffer_store_dword define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4 diff --git a/llvm/test/CodeGen/R600/smrd.ll b/llvm/test/CodeGen/R600/smrd.ll index a66ad02..de60554 100644 --- a/llvm/test/CodeGen/R600/smrd.ll +++ b/llvm/test/CodeGen/R600/smrd.ll @@ -37,10 +37,12 @@ entry: ; SMRD load with a 64-bit offset ; CHECK-LABEL: {{^}}smrd3: -; CHECK-DAG: s_mov_b32 s[[SHI:[0-9]+]], 4 -; CHECK-DAG: s_mov_b32 s[[SLO:[0-9]+]], 0 ; -; FIXME: We don't need to copy these values to VGPRs -; CHECK-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] +; FIXME: There are too many copies here because we don't fold immediates +; through REG_SEQUENCE +; CHECK: s_mov_b32 s[[SLO:[0-9]+]], 0 ; +; CHECK: s_mov_b32 s[[SHI:[0-9]+]], 4 +; CHECK: s_mov_b32 s[[SSLO:[0-9]+]], s[[SLO]] +; CHECK-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SSLO]] ; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] ; FIXME: We should be able to use s_load_dword here ; CHECK: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 diff --git a/llvm/test/CodeGen/R600/trunc.ll b/llvm/test/CodeGen/R600/trunc.ll index a6f902f..eb7e4ed 100644 --- a/llvm/test/CodeGen/R600/trunc.ll +++ b/llvm/test/CodeGen/R600/trunc.ll @@ -34,6 +34,8 @@ define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) { ; SI: s_lshl_b64 s{{\[}}[[LO_SHL:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_SREG]]:{{[0-9]+\]}}, 2 ; SI: s_add_u32 s[[LO_SREG2:[0-9]+]], s[[LO_SHL]], ; SI: s_addc_u32 +; SI: v_mov_b32_e32 +; SI: v_mov_b32_e32 ; SI: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]] ; SI: buffer_store_dword v[[LO_VREG]], define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) { diff --git a/llvm/test/CodeGen/R600/udivrem.ll b/llvm/test/CodeGen/R600/udivrem.ll index b439d7a..b3837f2 100644 --- a/llvm/test/CodeGen/R600/udivrem.ll +++ b/llvm/test/CodeGen/R600/udivrem.ll @@ -118,7 +118,7 @@ define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_sub_i32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]] +; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]] @@ -141,7 +141,7 @@ define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_sub_i32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]] +; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]] @@ -268,7 +268,7 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_sub_i32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]] +; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[l0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]] @@ -291,7 +291,7 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_sub_i32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]] +; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]] @@ -314,7 +314,7 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[THIRD_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[THIRD_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_sub_i32_e32 [[THIRD_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[THIRD_Num_S_Remainder]] +; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder:v[0-9]+]], [[THIRD_Num_S_Remainder]], {{v[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[THIRD_Tmp1:v[0-9]+]] @@ -335,20 +335,6 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3 ; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]] ; SI-DAG: v_subrev_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]] ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_mul_hi_u32 [[FOURTH_Quotient:v[0-9]+]] -; SI-DAG: v_mul_lo_i32 [[FOURTH_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_sub_i32_e32 [[FOURTH_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FOURTH_Num_S_Remainder]] -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 [[FOURTH_Tmp1:v[0-9]+]] -; SI-DAG: v_add_i32_e32 [[FOURTH_Quotient_A_One:v[0-9]+]], {{.*}}, [[FOURTH_Quotient]] -; SI-DAG: v_subrev_i32_e32 [[FOURTH_Quotient_S_One:v[0-9]+]], -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_add_i32_e32 [[FOURTH_Remainder_A_Den:v[0-9]+]], -; SI-DAG: v_subrev_i32_e32 [[FOURTH_Remainder_S_Den:v[0-9]+]], -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 ; SI: s_endpgm define void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { %result0 = udiv <4 x i32> %x, %y diff --git a/llvm/test/CodeGen/R600/valu-i1.ll b/llvm/test/CodeGen/R600/valu-i1.ll index a402717..5a3c2ec 100644 --- a/llvm/test/CodeGen/R600/valu-i1.ll +++ b/llvm/test/CodeGen/R600/valu-i1.ll @@ -81,7 +81,6 @@ exit: ; SI: buffer_store_dword ; SI: v_cmp_eq_i32_e32 vcc, ; SI: s_or_b64 [[OR_SREG:s\[[0-9]+:[0-9]+\]]] -; SI: v_add_i32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} ; SI: s_andn2_b64 exec, exec, [[OR_SREG]] ; SI: s_cbranch_execnz BB2_3 @@ -124,8 +123,8 @@ exit: ; Clear exec bits for workitems that load -1s ; SI: BB3_3: -; SI: buffer_load_dword [[A:v[0-9]+]] ; SI: buffer_load_dword [[B:v[0-9]+]] +; SI: buffer_load_dword [[A:v[0-9]+]] ; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], [[A]], -1 ; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_1:s\[[0-9]+:[0-9]+\]]], [[B]], -1 ; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]] diff --git a/llvm/test/CodeGen/R600/wait.ll b/llvm/test/CodeGen/R600/wait.ll index 93cfdd4..43561aa 100644 --- a/llvm/test/CodeGen/R600/wait.ll +++ b/llvm/test/CodeGen/R600/wait.ll @@ -4,9 +4,8 @@ ; CHECK-LABEL: {{^}}main: ; CHECK: s_load_dwordx4 ; CHECK: s_load_dwordx4 -; CHECK: s_waitcnt lgkmcnt(0){{$}} -; CHECK: s_waitcnt vmcnt(0){{$}} -; CHECK: s_waitcnt expcnt(0) lgkmcnt(0){{$}} +; CHECK: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; CHECK: s_endpgm define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 { main_body: %tmp = getelementptr <16 x i8> addrspace(2)* %arg3, i32 0 diff --git a/llvm/test/CodeGen/R600/xor.ll b/llvm/test/CodeGen/R600/xor.ll index b43ff40..1526e28 100644 --- a/llvm/test/CodeGen/R600/xor.ll +++ b/llvm/test/CodeGen/R600/xor.ll @@ -58,8 +58,8 @@ define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float ad } ; FUNC-LABEL: {{^}}v_xor_i1: -; SI: buffer_load_ubyte [[A:v[0-9]+]] ; SI: buffer_load_ubyte [[B:v[0-9]+]] +; SI: buffer_load_ubyte [[A:v[0-9]+]] ; SI: v_xor_b32_e32 [[XOR:v[0-9]+]], [[A]], [[B]] ; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[XOR]] ; SI: buffer_store_byte [[RESULT]] diff --git a/llvm/test/CodeGen/R600/zero_extend.ll b/llvm/test/CodeGen/R600/zero_extend.ll index 4492385..1f4dd43 100644 --- a/llvm/test/CodeGen/R600/zero_extend.ll +++ b/llvm/test/CodeGen/R600/zero_extend.ll @@ -30,9 +30,9 @@ entry: } ; SI-CHECK-LABEL: {{^}}zext_i1_to_i64: +; SI-CHECK: s_mov_b32 s{{[0-9]+}}, 0 ; SI-CHECK: v_cmp_eq_i32 ; SI-CHECK: v_cndmask_b32 -; SI-CHECK: s_mov_b32 s{{[0-9]+}}, 0 define void @zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %cmp = icmp eq i32 %a, %b %ext = zext i1 %cmp to i64 -- 2.7.4