setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
+ setOperationAction(ISD::LOAD, MVT::i64, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
+
setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
// We need to custom lower loads/stores from private memory
setOperationAction(ISD::LOAD, MVT::i32, Custom);
- setOperationAction(ISD::LOAD, MVT::i64, Custom);
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::LOAD: {
LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
+ EVT VT = Op.getValueType();
+
+ // These loads are legal.
+ if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
+ VT.isVector() && VT.getVectorNumElements() == 2 &&
+ VT.getVectorElementType() == MVT::i32)
+ return SDValue();
+
if (Op.getValueType().isVector() &&
(Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, i64>;
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
defm : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
defm : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
defm : DSReadPat <DS_READ_B32, i32, local_load>;
-defm : DSReadPat <DS_READ_B64, i64, local_load>;
+defm : DSReadPat <DS_READ_B64, v2i32, local_load>;
multiclass DSWritePat <DS inst, ValueType vt, PatFrag frag> {
def : Pat <
az_extloadi16_global, az_extloadi16_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32,
global_load, constant_load>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64,
- global_load, constant_load>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64,
- az_extloadi32_global, az_extloadi32_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, v2i32,
global_load, constant_load>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32,
}
; SI-LABEL: @trunc_i64_add_to_i32
-; SI: S_LOAD_DWORD [[SREG0:s[0-9]+]],
-; SI: S_LOAD_DWORD [[SREG1:s[0-9]+]],
-; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], [[SREG1]], [[SREG0]]
+; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG0:[0-9]+]]
+; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG1:[0-9]+]]
+; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
; SI-NOT: ADDC
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: BUFFER_STORE_DWORD [[VRESULT]],
; R600-CHECK: LDS_READ_RET
; R600-CHECK: LDS_READ_RET
; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_B32
-; SI-CHECK: DS_READ_B32
+; SI-CHECK: DS_READ_B64
define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
entry:
%0 = load <2 x float> addrspace(3)* %in
}
; SI-LABEL: @trunc_i64_or_to_i32
-; SI: S_LOAD_DWORD [[SREG0:s[0-9]+]],
-; SI: S_LOAD_DWORD [[SREG1:s[0-9]+]],
-; SI: S_OR_B32 [[SRESULT:s[0-9]+]], [[SREG1]], [[SREG0]]
+; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG0:[0-9]+]]
+; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG1:[0-9]+]]
+; SI: S_OR_B32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: BUFFER_STORE_DWORD [[VRESULT]],
define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {