-; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
declare i32 @llvm.SI.tid() nounwind readnone
declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
; be 32-bits.
; SI-LABEL: @test_private_array_ptr_calc:
+
; SI: V_ADD_I32_e32 [[PTRREG:v[0-9]+]]
+
+; SI-ALLOCA: V_MOVRELD_B32_e32 {{v[0-9]+}}, [[PTRREG]]
;
; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
; alloca to a vector. It currently fails because it does not know how
; to interpret:
; getelementptr [4 x i32]* %alloca, i32 1, i32 %b
-; SI: DS_WRITE_B32 {{v[0-9]+}}, [[PTRREG]]
+
+; SI-PROMOTE: DS_WRITE_B32 {{v[0-9]+}}, [[PTRREG]]
define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
%alloca = alloca [4 x i32], i32 4, align 16
%tid = call i32 @llvm.SI.tid() readnone
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=SI -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
+
declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
; SI-LABEL: @private_access_f64_alloca:
-; SI: DS_WRITE_B64
-; SI: DS_READ_B64
+
+; SI-ALLOCA: V_MOVRELD_B32_e32
+; SI-ALLOCA: V_MOVRELD_B32_e32
+; SI-ALLOCA: V_MOVRELS_B32_e32
+; SI-ALLOCA: V_MOVRELS_B32_e32
+
+; SI-PROMOTE: DS_WRITE_B64
+; SI-PROMOTE: DS_READ_B64
define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
%val = load double addrspace(1)* %in, align 8
%array = alloca double, i32 16, align 8
}
; SI-LABEL: @private_access_v2f64_alloca:
-; SI: DS_WRITE_B64
-; SI: DS_WRITE_B64
-; SI: DS_READ_B64
-; SI: DS_READ_B64
+
+; SI-ALLOCA: V_MOVRELD_B32_e32
+; SI-ALLOCA: V_MOVRELD_B32_e32
+; SI-ALLOCA: V_MOVRELS_B32_e32
+; SI-ALLOCA: V_MOVRELS_B32_e32
+
+; SI-PROMOTE: DS_WRITE_B64
+; SI-PROMOTE: DS_WRITE_B64
+; SI-PROMOTE: DS_READ_B64
+; SI-PROMOTE: DS_READ_B64
define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
%val = load <2 x double> addrspace(1)* %in, align 16
%array = alloca <2 x double>, i32 16, align 16
}
; SI-LABEL: @private_access_i64_alloca:
-; SI: DS_WRITE_B64
-; SI: DS_READ_B64
+
+; SI-ALLOCA: V_MOVRELD_B32_e32
+; SI-ALLOCA: V_MOVRELD_B32_e32
+; SI-ALLOCA: V_MOVRELS_B32_e32
+; SI-ALLOCA: V_MOVRELS_B32_e32
+
+; SI-PROMOTE: DS_WRITE_B64
+; SI-PROMOTE: DS_READ_B64
define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
%val = load i64 addrspace(1)* %in, align 8
%array = alloca i64, i32 16, align 8
}
; SI-LABEL: @private_access_v2i64_alloca:
-; SI: DS_WRITE_B64
-; SI: DS_WRITE_B64
-; SI: DS_READ_B64
-; SI: DS_READ_B64
+
+; SI-ALLOCA: V_MOVRELD_B32_e32
+; SI-ALLOCA: V_MOVRELD_B32_e32
+; SI-ALLOCA: V_MOVRELD_B32_e32
+; SI-ALLOCA: V_MOVRELD_B32_e32
+; SI-ALLOCA: V_MOVRELS_B32_e32
+; SI-ALLOCA: V_MOVRELS_B32_e32
+; SI-ALLOCA: V_MOVRELS_B32_e32
+; SI-ALLOCA: V_MOVRELS_B32_e32
+
+; SI-PROMOTE: DS_WRITE_B64
+; SI-PROMOTE: DS_WRITE_B64
+; SI-PROMOTE: DS_READ_B64
+; SI-PROMOTE: DS_READ_B64
define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
%val = load <2 x i64> addrspace(1)* %in, align 16
%array = alloca <2 x i64>, i32 16, align 16
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: @vector_read
; EG: MOV
; This test should be optimize to:
; store i32 0, i32 addrspace(1)* %out
; FUNC-LABEL: @bitcast_gep
-; CHECK: STORE_RAW
+; EG: STORE_RAW
define void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
entry:
%0 = alloca [4 x i32]