setOperationAction(ISD::LOAD, MVT::i64, Promote);
AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
+ setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+
setOperationAction(ISD::STORE, MVT::v8i32, Custom);
setOperationAction(ISD::STORE, MVT::v16i32, Custom);
setOperationAction(ISD::STORE, MVT::i64, Promote);
AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
+ setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+
setOperationAction(ISD::SELECT, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Promote);
AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);
assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
"Custom lowering for non-i32 vectors hasn't been implemented.");
- unsigned NumElements = MemVT.getVectorNumElements();
- assert(NumElements != 2 && "v2 loads are supported for all address spaces.");
- switch (Load->getAddressSpace()) {
+ unsigned AS = Load->getAddressSpace();
+ if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
+ AS, Load->getAlignment())) {
+ SDValue Ops[2];
+ std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
+ return DAG.getMergeValues(Ops, DL);
+ }
+
+ unsigned NumElements = MemVT.getVectorNumElements();
+ switch (AS) {
case AMDGPUAS::CONSTANT_ADDRESS:
if (isMemOpUniform(Load))
return SDValue();
llvm_unreachable("unsupported private_element_size");
}
}
- case AMDGPUAS::LOCAL_ADDRESS:
+ case AMDGPUAS::LOCAL_ADDRESS: {
+ if (NumElements > 2)
+ return SplitVectorLoad(Op, DAG);
+
+ if (NumElements == 2)
+ return SDValue();
+
// If properly aligned, if we split we might be able to use ds_read_b64.
return SplitVectorLoad(Op, DAG);
+ }
default:
return SDValue();
}
Store->getBasePtr(), MVT::i1, Store->getMemOperand());
}
- assert(Store->getValue().getValueType().getScalarType() == MVT::i32);
+ assert(VT.isVector() &&
+ Store->getValue().getValueType().getScalarType() == MVT::i32);
+
+ unsigned AS = Store->getAddressSpace();
+ if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ AS, Store->getAlignment())) {
+ return expandUnalignedStore(Store, DAG);
+ }
unsigned NumElements = VT.getVectorNumElements();
- switch (Store->getAddressSpace()) {
+ switch (AS) {
case AMDGPUAS::GLOBAL_ADDRESS:
case AMDGPUAS::FLAT_ADDRESS:
if (NumElements > 4)
llvm_unreachable("unsupported private_element_size");
}
}
- case AMDGPUAS::LOCAL_ADDRESS:
+ case AMDGPUAS::LOCAL_ADDRESS: {
+ if (NumElements > 2)
+ return SplitVectorStore(Op, DAG);
+
+ if (NumElements == 2)
+ return Op;
+
// If properly aligned, if we split we might be able to use ds_write_b64.
return SplitVectorStore(Op, DAG);
+ }
default:
llvm_unreachable("unhandled address space");
}
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}unaligned_load_store_i16_local:
ret void
}
+; SI-LABEL: {{^}}align2_load_store_i32_global:
+; SI: buffer_load_ushort
+; SI: buffer_load_ushort
+; SI: buffer_store_short
+; SI: buffer_store_short
+define void @align2_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind {
+ %v = load i32, i32 addrspace(1)* %p, align 2
+ store i32 %v, i32 addrspace(1)* %r, align 2
+ ret void
+}
+
+; SI-LABEL: {{^}}align2_load_store_i32_local:
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_write_b16
+; SI: ds_write_b16
+define void @align2_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
+ %v = load i32, i32 addrspace(3)* %p, align 2
+ store i32 %v, i32 addrspace(3)* %r, align 2
+ ret void
+}
+
+; FIXME: Unnecessary packing and unpacking of bytes.
; SI-LABEL: {{^}}unaligned_load_store_i64_local:
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
+
+; XSI-NOT: v_or_b32
+; XSI-NOT: v_lshl
; SI: ds_write_b8
+; XSI-NOT: v_or_b32
+; XSI-NOT: v_lshl
+
; SI: ds_write_b8
+; XSI-NOT: v_or_b32
+; XSI-NOT: v_lshl
+
; SI: ds_write_b8
+; XSI-NOT: v_or_b32
+; XSI-NOT: v_lshl
+
; SI: ds_write_b8
+; XSI-NOT: v_or_b32
+; XSI-NOT: v_lshl
+
; SI: ds_write_b8
+; XSI-NOT: v_or_b32
+; XSI-NOT: v_lshl
+
; SI: ds_write_b8
+; XSI-NOT: v_or_b32
+; XSI-NOT: v_lshl
+
; SI: ds_write_b8
+; XSI-NOT: v_or_b32
+; XSI-NOT: v_lshl
; SI: ds_write_b8
; SI: s_endpgm
define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
ret void
}
+; SI-LABEL: {{^}}unaligned_load_store_v2i32_local:
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; XSI-NOT: v_or_b32
+; XSI-NOT: v_lshl
+; SI: ds_write_b8
+; XSI-NOT: v_or_b32
+; XSI-NOT: v_lshl
+
+; SI: ds_write_b8
+; XSI-NOT: v_or_b32
+; XSI-NOT: v_lshl
+
+; SI: ds_write_b8
+; XSI-NOT: v_or_b32
+; XSI-NOT: v_lshl
+
+; SI: ds_write_b8
+; XSI-NOT: v_or_b32
+; XSI-NOT: v_lshl
+
+; SI: ds_write_b8
+; XSI-NOT: v_or_b32
+; XSI-NOT: v_lshl
+
+; SI: ds_write_b8
+; XSI-NOT: v_or_b32
+; XSI-NOT: v_lshl
+
+; SI: ds_write_b8
+; XSI-NOT: v_or_b32
+; XSI-NOT: v_lshl
+; SI: ds_write_b8
+; SI: s_endpgm
+define void @unaligned_load_store_v2i32_local(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) {
+ %v = load <2 x i32>, <2 x i32> addrspace(3)* %p, align 1
+ store <2 x i32> %v, <2 x i32> addrspace(3)* %r, align 1
+ ret void
+}
+
; SI-LABEL: {{^}}unaligned_load_store_i64_global:
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
; SI: buffer_load_ubyte
+
+; XSI-NOT: v_or_
+; XSI-NOT: v_lshl
+
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI: buffer_store_byte