def int_amdgcn_buffer_load_format : Intrinsic <
[llvm_v4f32_ty],
[llvm_v4i32_ty, // rsrc(SGPR)
- llvm_i32_ty, // soffset(SGPR)
- llvm_i32_ty, // offset(imm)
llvm_i32_ty, // vindex(VGPR)
- llvm_i32_ty, // voffset(VGPR)
+ llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty, // glc(imm)
llvm_i1_ty], // slc(imm)
[IntrReadMem]>;
[],
[llvm_anyfloat_ty, // vdata(VGPR) -- can currently only select v4f32
llvm_v4i32_ty, // rsrc(SGPR)
- llvm_i32_ty, // soffset(SGPR)
- llvm_i32_ty, // offset(imm)
llvm_i32_ty, // vindex(VGPR)
- llvm_i32_ty, // voffset(VGPR)
+ llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty, // glc(imm)
llvm_i1_ty], // slc(imm)
[]>;
SDValue &TFE) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset, SDValue &GLC) const;
+ void SelectMUBUFConstant(SDValue Constant,
+ SDValue &SOffset,
+ SDValue &ImmOffset) const;
+ bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
+ SDValue &ImmOffset) const;
+ bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
+ SDValue &ImmOffset, SDValue &VOffset) const;
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
bool &Imm) const;
bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
}
+void AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
+ SDValue &SOffset,
+ SDValue &ImmOffset) const {
+ SDLoc DL(Constant);
+ uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
+ uint32_t Overflow = 0;
+
+ if (Imm >= 4096) {
+ if (Imm <= 4095 + 64) {
+ // Use an SOffset inline constant for 1..64
+ Overflow = Imm - 4095;
+ Imm = 4095;
+ } else {
+ // Try to keep the same value in SOffset for adjacent loads, so that
+ // the corresponding register contents can be re-used.
+ //
+ // Load values with all low-bits set into SOffset, so that a larger
+ // range of values can be covered using s_movk_i32
+ uint32_t High = (Imm + 1) & ~4095;
+ uint32_t Low = (Imm + 1) & 4095;
+ Imm = Low;
+ Overflow = High - 1;
+ }
+ }
+
+ ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
+
+ if (Overflow <= 64)
+ SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
+ else
+ SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
+ 0);
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
+ SDValue &SOffset,
+ SDValue &ImmOffset) const {
+ SDLoc DL(Offset);
+
+ if (!isa<ConstantSDNode>(Offset))
+ return false;
+
+ SelectMUBUFConstant(Offset, SOffset, ImmOffset);
+
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
+ SDValue &SOffset,
+ SDValue &ImmOffset,
+ SDValue &VOffset) const {
+ SDLoc DL(Offset);
+
+ // Don't generate an unnecessary voffset for constant offsets.
+ if (isa<ConstantSDNode>(Offset))
+ return false;
+
+ if (CurDAG->isBaseWithConstantOffset(Offset)) {
+ SDValue N0 = Offset.getOperand(0);
+ SDValue N1 = Offset.getOperand(1);
+ SelectMUBUFConstant(N1, SOffset, ImmOffset);
+ VOffset = N0;
+ } else {
+ SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ VOffset = Offset;
+ }
+
+ return true;
+}
+
///
/// \param EncodedOffset This is the immediate value that will be encoded
/// directly into the instruction. On SI/CI the \p EncodedOffset
def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
+def MUBUFIntrinsicOffset : ComplexPattern<i32, 2, "SelectMUBUFIntrinsicOffset">;
+def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset">;
def SMRDImm : ComplexPattern<i64, 2, "SelectSMRDImm">;
def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
// buffer_load/store_format patterns
//===----------------------------------------------------------------------===//
def : Pat<
- (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, 0, 0,
+ (int_amdgcn_buffer_load_format v4i32:$rsrc, 0,
+ (MUBUFIntrinsicOffset i32:$soffset,
+ i16:$offset),
imm:$glc, imm:$slc),
(BUFFER_LOAD_FORMAT_XYZW_OFFSET $rsrc, $soffset, (as_i16imm $offset),
(as_i1imm $glc), (as_i1imm $slc), 0)
>;
def : Pat<
- (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, i32:$vindex, 0,
+ (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$vindex,
+ (MUBUFIntrinsicOffset i32:$soffset,
+ i16:$offset),
imm:$glc, imm:$slc),
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $vindex, $rsrc, $soffset, (as_i16imm $offset),
(as_i1imm $glc), (as_i1imm $slc), 0)
>;
def : Pat<
- (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, 0, i32:$voffset,
+ (int_amdgcn_buffer_load_format v4i32:$rsrc, 0,
+ (MUBUFIntrinsicVOffset i32:$soffset,
+ i16:$offset,
+ i32:$voffset),
imm:$glc, imm:$slc),
(BUFFER_LOAD_FORMAT_XYZW_OFFEN $voffset, $rsrc, $soffset, (as_i16imm $offset),
(as_i1imm $glc), (as_i1imm $slc), 0)
>;
def : Pat<
- (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$soffset, imm:$offset, i32:$vindex, i32:$voffset,
+ (int_amdgcn_buffer_load_format v4i32:$rsrc, i32:$vindex,
+ (MUBUFIntrinsicVOffset i32:$soffset,
+ i16:$offset,
+ i32:$voffset),
imm:$glc, imm:$slc),
(BUFFER_LOAD_FORMAT_XYZW_BOTHEN
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
>;
def : Pat<
- (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc,
- i32:$soffset, imm:$offset, 0, 0,
+ (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, 0,
+ (MUBUFIntrinsicOffset i32:$soffset,
+ i16:$offset),
imm:$glc, imm:$slc),
(BUFFER_STORE_FORMAT_XYZW_OFFSET $vdata, $rsrc, $soffset, (as_i16imm $offset),
(as_i1imm $glc), (as_i1imm $slc), 0)
>;
def : Pat<
- (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc,
- i32:$soffset, imm:$offset, i32:$vindex, 0,
- imm:$glc, imm:$slc),
+ (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, i32:$vindex,
+ (MUBUFIntrinsicOffset i32:$soffset,
+ i16:$offset),
+ imm:$glc, imm:$slc),
(BUFFER_STORE_FORMAT_XYZW_IDXEN $vdata, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0)
>;
def : Pat<
- (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc,
- i32:$soffset, imm:$offset, 0, i32:$voffset,
+ (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, 0,
+ (MUBUFIntrinsicVOffset i32:$soffset,
+ i16:$offset,
+ i32:$voffset),
imm:$glc, imm:$slc),
(BUFFER_STORE_FORMAT_XYZW_OFFEN $vdata, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), 0)
>;
def : Pat<
- (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, i32:$soffset,
- imm:$offset, i32:$vindex, i32:$voffset,
+ (int_amdgcn_buffer_store_format v4f32:$vdata, v4i32:$rsrc, i32:$vindex,
+ (MUBUFIntrinsicVOffset i32:$soffset,
+ i16:$offset,
+ i32:$voffset),
imm:$glc, imm:$slc),
(BUFFER_STORE_FORMAT_XYZW_BOTHEN
$vdata,
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
;CHECK-LABEL: {{^}}buffer_load:
-;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], s4
-;CHECK: buffer_load_format_xyzw v[4:7], s[0:3], s4 glc
-;CHECK: buffer_load_format_xyzw v[8:11], s[0:3], s4 slc
+;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], 0
+;CHECK: buffer_load_format_xyzw v[4:7], s[0:3], 0 glc
+;CHECK: buffer_load_format_xyzw v[8:11], s[0:3], 0 slc
;CHECK: s_waitcnt
-define {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg, i32 inreg) #0 {
+define {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) #0 {
main_body:
- %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 0, i32 0, i32 0, i1 0, i1 0)
- %data_glc = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 0, i32 0, i32 0, i1 1, i1 0)
- %data_slc = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 0, i32 0, i32 0, i1 0, i1 1)
+ %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
+ %data_glc = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i1 1, i1 0)
+ %data_slc = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i1 0, i1 1)
%r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
%r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
%r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
}
;CHECK-LABEL: {{^}}buffer_load_immoffs:
-;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], s4 offset:42
+;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], 0 offset:42
;CHECK: s_waitcnt
-define <4 x float> @buffer_load_immoffs(<4 x i32> inreg, i32 inreg) #0 {
+define <4 x float> @buffer_load_immoffs(<4 x i32> inreg) #0 {
main_body:
- %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 42, i32 0, i32 0, i1 0, i1 0)
+ %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
+ ret <4 x float> %data
+}
+
+;CHECK-LABEL: {{^}}buffer_load_immoffs_large:
+;CHECK-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 61 offset:4095
+;CHECK-DAG: s_movk_i32 [[OFS1:s[0-9]+]], 0x7fff
+;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS1]] offset:4093
+;CHECK: s_mov_b32 [[OFS2:s[0-9]+]], 0x8fff
+;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS2]] offset:1
+;CHECK: s_waitcnt
+define <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) #0 {
+main_body:
+ %d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 4156, i1 0, i1 0)
+ %d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 36860, i1 0, i1 0)
+ %d.2 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 36864, i1 0, i1 0)
+ %d.3 = fadd <4 x float> %d.0, %d.1
+ %data = fadd <4 x float> %d.2, %d.3
+ ret <4 x float> %data
+}
+
+;CHECK-LABEL: {{^}}buffer_load_immoffs_reuse:
+;CHECK: s_movk_i32 [[OFS:s[0-9]+]], 0xfff
+;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS]] offset:65
+;CHECK-NOT: s_mov
+;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS]] offset:81
+;CHECK: s_waitcnt
+define <4 x float> @buffer_load_immoffs_reuse(<4 x i32> inreg) #0 {
+main_body:
+ %d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 4160, i1 0, i1 0)
+ %d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 4176, i1 0, i1 0)
+ %data = fadd <4 x float> %d.0, %d.1
ret <4 x float> %data
}
;CHECK: s_waitcnt
define <4 x float> @buffer_load_idx(<4 x i32> inreg, i32) #0 {
main_body:
- %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i32 %1, i32 0, i1 0, i1 0)
+ %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 0, i1 0, i1 0)
ret <4 x float> %data
}
;CHECK: s_waitcnt
define <4 x float> @buffer_load_ofs(<4 x i32> inreg, i32) #0 {
main_body:
- %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i32 0, i32 %1, i1 0, i1 0)
+ %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 %1, i1 0, i1 0)
+ ret <4 x float> %data
+}
+
+;CHECK-LABEL: {{^}}buffer_load_ofs_imm:
+;CHECK: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 offen offset:58
+;CHECK: s_waitcnt
+define <4 x float> @buffer_load_ofs_imm(<4 x i32> inreg, i32) #0 {
+main_body:
+ %ofs = add i32 %1, 58
+ %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 %ofs, i1 0, i1 0)
ret <4 x float> %data
}
;CHECK: s_waitcnt
define <4 x float> @buffer_load_both(<4 x i32> inreg, i32, i32) #0 {
main_body:
- %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i32 %1, i32 %2, i1 0, i1 0)
+ %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %1, i32 %2, i1 0, i1 0)
ret <4 x float> %data
}
;CHECK: s_waitcnt
define <4 x float> @buffer_load_both_reversed(<4 x i32> inreg, i32, i32) #0 {
main_body:
- %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i32 %2, i32 %1, i1 0, i1 0)
+ %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %2, i32 %1, i1 0, i1 0)
ret <4 x float> %data
}
-declare <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32>, i32, i32, i32, i32, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32>, i32, i32, i1, i1) #1
attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readonly }
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
;CHECK-LABEL: {{^}}buffer_store:
-;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], s4
-;CHECK: buffer_store_format_xyzw v[4:7], s[0:3], s4 glc
-;CHECK: buffer_store_format_xyzw v[8:11], s[0:3], s4 slc
-define void @buffer_store(<4 x i32> inreg, i32 inreg, <4 x float>, <4 x float>, <4 x float>) #0 {
+;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], 0
+;CHECK: buffer_store_format_xyzw v[4:7], s[0:3], 0 glc
+;CHECK: buffer_store_format_xyzw v[8:11], s[0:3], 0 slc
+define void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) #0 {
main_body:
- call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 %1, i32 0, i32 0, i32 0, i1 0, i1 0)
- call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %3, <4 x i32> %0, i32 %1, i32 0, i32 0, i32 0, i1 1, i1 0)
- call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %4, <4 x i32> %0, i32 %1, i32 0, i32 0, i32 0, i1 0, i1 1)
+ call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
+ call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i1 1, i1 0)
+ call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i1 0, i1 1)
ret void
}
;CHECK-LABEL: {{^}}buffer_store_immoffs:
-;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], s4 offset:42
-define void @buffer_store_immoffs(<4 x i32> inreg, i32 inreg, <4 x float>) #0 {
+;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], 0 offset:42
+define void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) #0 {
main_body:
- call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 %1, i32 42, i32 0, i32 0, i1 0, i1 0)
+ call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
ret void
}
;CHECK-LABEL: {{^}}buffer_store_idx:
;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen
-define void @buffer_store_idx(<4 x i32> inreg, i32 inreg, <4 x float>, i32) #0 {
+define void @buffer_store_idx(<4 x i32> inreg, <4 x float>, i32) #0 {
main_body:
- call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i32 %3, i32 0, i1 0, i1 0)
+ call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i1 0, i1 0)
ret void
}
;CHECK-LABEL: {{^}}buffer_store_ofs:
;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 offen
-define void @buffer_store_ofs(<4 x i32> inreg, i32 inreg, <4 x float>, i32) #0 {
+define void @buffer_store_ofs(<4 x i32> inreg, <4 x float>, i32) #0 {
main_body:
- call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i32 0, i32 %3, i1 0, i1 0)
+ call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 %2, i1 0, i1 0)
ret void
}
;CHECK-LABEL: {{^}}buffer_store_both:
;CHECK: buffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 idxen offen
-define void @buffer_store_both(<4 x i32> inreg, i32 inreg, <4 x float>, i32, i32) #0 {
+define void @buffer_store_both(<4 x i32> inreg, <4 x float>, i32, i32) #0 {
main_body:
- call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i32 %3, i32 %4, i1 0, i1 0)
+ call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 %3, i1 0, i1 0)
ret void
}
;CHECK-LABEL: {{^}}buffer_store_both_reversed:
;CHECK: v_mov_b32_e32 v6, v4
;CHECK: buffer_store_format_xyzw v[0:3], v[5:6], s[0:3], 0 idxen offen
-define void @buffer_store_both_reversed(<4 x i32> inreg, i32 inreg, <4 x float>, i32, i32) #0 {
+define void @buffer_store_both_reversed(<4 x i32> inreg, <4 x float>, i32, i32) #0 {
main_body:
- call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i32 %4, i32 %3, i1 0, i1 0)
+ call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %3, i32 %2, i1 0, i1 0)
ret void
}
;CHECK: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen
;CHECK: s_waitcnt vmcnt(0)
;CHECK: buffer_store_format_xyzw v[0:3], v6, s[0:3], 0 idxen
-define void @buffer_store_wait(<4 x i32> inreg, i32 inreg, <4 x float>, i32, i32, i32) #0 {
+define void @buffer_store_wait(<4 x i32> inreg, <4 x float>, i32, i32, i32) #0 {
main_body:
- call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i32 %3, i32 0, i1 0, i1 0)
- %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 0, i32 0, i32 %4, i32 0, i1 0, i1 0)
- call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %data, <4 x i32> %0, i32 0, i32 0, i32 %5, i32 0, i1 0, i1 0)
+ call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i1 0, i1 0)
+ %data = call <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32> %0, i32 %3, i32 0, i1 0, i1 0)
+ call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %data, <4 x i32> %0, i32 %4, i32 0, i1 0, i1 0)
ret void
}
-declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32>, i32, i32, i32, i32, i1, i1) #2
+declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.buffer.load.format(<4 x i32>, i32, i32, i1, i1) #2
attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind }