Add more tests for buffer intrinsics with large voffsets.
ret <4 x float> %data
}
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: buffer_load_voffset_large_12bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4092
+; PREGFX10-NEXT: s_waitcnt vmcnt(0)
+; PREGFX10-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: buffer_load_voffset_large_12bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4092
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_12bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 4092, i32 0, i32 0)
+ ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_13bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: buffer_load_voffset_large_13bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: v_mov_b32_e32 v0, 0x1000
+; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
+; PREGFX10-NEXT: s_waitcnt vmcnt(0)
+; PREGFX10-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: buffer_load_voffset_large_13bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_13bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 8188, i32 0, i32 0)
+ ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_16bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: buffer_load_voffset_large_16bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: v_mov_b32_e32 v0, 0xf000
+; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
+; PREGFX10-NEXT: s_waitcnt vmcnt(0)
+; PREGFX10-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: buffer_load_voffset_large_16bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v0, 0xf000
+; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_16bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: v_mov_b32_e32 v0, 0xf000
+; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 65532, i32 0, i32 0)
+ ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_23bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: buffer_load_voffset_large_23bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
+; PREGFX10-NEXT: s_waitcnt vmcnt(0)
+; PREGFX10-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: buffer_load_voffset_large_23bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_23bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 8388604, i32 0, i32 0)
+ ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_24bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: buffer_load_voffset_large_24bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: v_mov_b32_e32 v0, 0xfff000
+; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
+; PREGFX10-NEXT: s_waitcnt vmcnt(0)
+; PREGFX10-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: buffer_load_voffset_large_24bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v0, 0xfff000
+; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_24bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: v_mov_b32_e32 v0, 0xfff000
+; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 16777212, i32 0, i32 0)
+ ret <4 x float> %data
+}
+
+
define amdgpu_ps float @buffer_load_x1(<4 x i32> inreg %rsrc, i32 %ofs) {
; PREGFX10-LABEL: buffer_load_x1:
; PREGFX10: ; %bb.0: ; %main_body
ret <4 x float> %data
}
-
define amdgpu_ps float @buffer_load_mmo(<4 x i32> inreg %rsrc, ptr addrspace(3) %lds) {
; GFX10-LABEL: buffer_load_mmo:
; GFX10: ; %bb.0: ; %entry
ret <4 x float> %vdata.f
}
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: buffer_load_voffset_large_12bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offset:4092
+; PREGFX10-NEXT: s_waitcnt vmcnt(0)
+; PREGFX10-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: buffer_load_voffset_large_12bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offset:4092
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_12bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 4092, i32 0, i32 63, i32 0)
+ ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_13bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: tbuffer_load_voffset_large_13bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: v_mov_b32_e32 v0, 0x1000
+; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
+; PREGFX10-NEXT: s_waitcnt vmcnt(0)
+; PREGFX10-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: tbuffer_load_voffset_large_13bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: tbuffer_load_voffset_large_13bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: v_mov_b32_e32 v0, 0x1000
+; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 8188, i32 0, i32 63, i32 0)
+ ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_16bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: tbuffer_load_voffset_large_16bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: v_mov_b32_e32 v0, 0xf000
+; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
+; PREGFX10-NEXT: s_waitcnt vmcnt(0)
+; PREGFX10-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: tbuffer_load_voffset_large_16bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v0, 0xf000
+; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: tbuffer_load_voffset_large_16bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: v_mov_b32_e32 v0, 0xf000
+; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 65532, i32 0, i32 63, i32 0)
+ ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_23bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: tbuffer_load_voffset_large_23bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
+; PREGFX10-NEXT: s_waitcnt vmcnt(0)
+; PREGFX10-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: tbuffer_load_voffset_large_23bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: tbuffer_load_voffset_large_23bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: v_mov_b32_e32 v0, 0x7ff000
+; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 8388604, i32 0, i32 63, i32 0)
+ ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_24bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: tbuffer_load_voffset_large_24bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: v_mov_b32_e32 v0, 0xfff000
+; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
+; PREGFX10-NEXT: s_waitcnt vmcnt(0)
+; PREGFX10-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: tbuffer_load_voffset_large_24bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v0, 0xfff000
+; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: tbuffer_load_voffset_large_24bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: v_mov_b32_e32 v0, 0xfff000
+; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 16777212, i32 0, i32 63, i32 0)
+ ret <4 x float> %data
+}
+
define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>} @tbuffer_load_immoffs_large(<4 x i32> inreg, i32 inreg %soffs) {
; PREGFX10-LABEL: tbuffer_load_immoffs_large:
; PREGFX10: ; %bb.0:
ret void
}
+define amdgpu_ps void @buffer_store_voffset_large_12bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; PREGFX10-LABEL: buffer_store_voffset_large_12bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: tbuffer_store_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offset:4092
+; PREGFX10-NEXT: s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_12bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: tbuffer_store_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offset:4092
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_12bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offset:4092
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+main_body:
+ call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 4092, i32 0, i32 63, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @buffer_store_voffset_large_13bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; PREGFX10-LABEL: buffer_store_voffset_large_13bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: v_mov_b32_e32 v4, 0x1000
+; PREGFX10-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
+; PREGFX10-NEXT: s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_13bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v4, 0x1000
+; GFX10-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_13bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: v_mov_b32_e32 v4, 0x1000
+; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+main_body:
+ call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 8188, i32 0, i32 63, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @buffer_store_voffset_large_16bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; PREGFX10-LABEL: buffer_store_voffset_large_16bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: v_mov_b32_e32 v4, 0xf000
+; PREGFX10-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
+; PREGFX10-NEXT: s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_16bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v4, 0xf000
+; GFX10-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_16bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: v_mov_b32_e32 v4, 0xf000
+; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+main_body:
+ call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 65532, i32 0, i32 63, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @buffer_store_voffset_large_23bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; PREGFX10-LABEL: buffer_store_voffset_large_23bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: v_mov_b32_e32 v4, 0x7ff000
+; PREGFX10-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
+; PREGFX10-NEXT: s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_23bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v4, 0x7ff000
+; GFX10-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_23bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: v_mov_b32_e32 v4, 0x7ff000
+; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+main_body:
+ call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 8388604, i32 0, i32 63, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @buffer_store_voffset_large_24bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; PREGFX10-LABEL: buffer_store_voffset_large_24bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: v_mov_b32_e32 v4, 0xfff000
+; PREGFX10-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
+; PREGFX10-NEXT: s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_24bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v4, 0xfff000
+; GFX10-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_24bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: v_mov_b32_e32 v4, 0xfff000
+; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+main_body:
+ call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 16777212, i32 0, i32 63, i32 0)
+ ret void
+}
+
declare void @llvm.amdgcn.raw.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
declare void @llvm.amdgcn.raw.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32) #0
declare void @llvm.amdgcn.raw.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32) #0
ret <4 x float> %data
}
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(<4 x i32> inreg) {
+; GFX6-LABEL: buffer_load_voffset_large_12bit:
+; GFX6: ; %bb.0: ; %main_body
+; GFX6-NEXT: v_mov_b32_e32 v0, 0
+; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX8PLUS-LABEL: buffer_load_voffset_large_12bit:
+; GFX8PLUS: ; %bb.0: ; %main_body
+; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0
+; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
+; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
+; GFX8PLUS-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_12bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4092, i32 0, i32 0)
+ ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_13bit(<4 x i32> inreg) {
+; GFX6-LABEL: buffer_load_voffset_large_13bit:
+; GFX6: ; %bb.0: ; %main_body
+; GFX6-NEXT: s_mov_b32 s4, 0
+; GFX6-NEXT: v_mov_b32_e32 v1, 0x1000
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX8PLUS-LABEL: buffer_load_voffset_large_13bit:
+; GFX8PLUS: ; %bb.0: ; %main_body
+; GFX8PLUS-NEXT: s_mov_b32 s4, 0
+; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0x1000
+; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4
+; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
+; GFX8PLUS-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_13bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: s_mov_b32 s4, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 8188, i32 0, i32 0)
+ ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_16bit(<4 x i32> inreg) {
+; GFX6-LABEL: buffer_load_voffset_large_16bit:
+; GFX6: ; %bb.0: ; %main_body
+; GFX6-NEXT: s_mov_b32 s4, 0
+; GFX6-NEXT: v_mov_b32_e32 v1, 0xf000
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX8PLUS-LABEL: buffer_load_voffset_large_16bit:
+; GFX8PLUS: ; %bb.0: ; %main_body
+; GFX8PLUS-NEXT: s_mov_b32 s4, 0
+; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0xf000
+; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4
+; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
+; GFX8PLUS-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_16bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: s_mov_b32 s4, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 65532, i32 0, i32 0)
+ ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_23bit(<4 x i32> inreg) {
+; GFX6-LABEL: buffer_load_voffset_large_23bit:
+; GFX6: ; %bb.0: ; %main_body
+; GFX6-NEXT: s_mov_b32 s4, 0
+; GFX6-NEXT: v_mov_b32_e32 v1, 0x7ff000
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX8PLUS-LABEL: buffer_load_voffset_large_23bit:
+; GFX8PLUS: ; %bb.0: ; %main_body
+; GFX8PLUS-NEXT: s_mov_b32 s4, 0
+; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0x7ff000
+; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4
+; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
+; GFX8PLUS-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_23bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: s_mov_b32 s4, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 8388604, i32 0, i32 0)
+ ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_24bit(<4 x i32> inreg) {
+; GFX6-LABEL: buffer_load_voffset_large_24bit:
+; GFX6: ; %bb.0: ; %main_body
+; GFX6-NEXT: s_mov_b32 s4, 0
+; GFX6-NEXT: v_mov_b32_e32 v1, 0xfff000
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX8PLUS-LABEL: buffer_load_voffset_large_24bit:
+; GFX8PLUS: ; %bb.0: ; %main_body
+; GFX8PLUS-NEXT: s_mov_b32 s4, 0
+; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0xfff000
+; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4
+; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX8PLUS-NEXT: s_waitcnt vmcnt(0)
+; GFX8PLUS-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_24bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: s_mov_b32 s4, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 16777212, i32 0, i32 0)
+ ret <4 x float> %data
+}
+
define amdgpu_ps <4 x float> @buffer_load_idx(<4 x i32> inreg, i32) {
; GFX6-LABEL: buffer_load_idx:
; GFX6: ; %bb.0: ; %main_body
ret float %vdata.f
}
+define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: buffer_load_voffset_large_12bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: v_mov_b32_e32 v0, 0
+; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offset:4092
+; PREGFX10-NEXT: s_waitcnt vmcnt(0)
+; PREGFX10-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: buffer_load_voffset_large_12bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offset:4092
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: buffer_load_voffset_large_12bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 4092, i32 0, i32 63, i32 0)
+ ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_13bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: tbuffer_load_voffset_large_13bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: s_mov_b32 s4, 0
+; PREGFX10-NEXT: v_mov_b32_e32 v1, 0x1000
+; PREGFX10-NEXT: v_mov_b32_e32 v0, s4
+; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; PREGFX10-NEXT: s_waitcnt vmcnt(0)
+; PREGFX10-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: tbuffer_load_voffset_large_13bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: v_mov_b32_e32 v1, 0x1000
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: tbuffer_load_voffset_large_13bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: s_mov_b32 s4, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 8188, i32 0, i32 63, i32 0)
+ ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_16bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: tbuffer_load_voffset_large_16bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: s_mov_b32 s4, 0
+; PREGFX10-NEXT: v_mov_b32_e32 v1, 0xf000
+; PREGFX10-NEXT: v_mov_b32_e32 v0, s4
+; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; PREGFX10-NEXT: s_waitcnt vmcnt(0)
+; PREGFX10-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: tbuffer_load_voffset_large_16bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: v_mov_b32_e32 v1, 0xf000
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: tbuffer_load_voffset_large_16bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: s_mov_b32 s4, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 65532, i32 0, i32 63, i32 0)
+ ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_23bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: tbuffer_load_voffset_large_23bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: s_mov_b32 s4, 0
+; PREGFX10-NEXT: v_mov_b32_e32 v1, 0x7ff000
+; PREGFX10-NEXT: v_mov_b32_e32 v0, s4
+; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; PREGFX10-NEXT: s_waitcnt vmcnt(0)
+; PREGFX10-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: tbuffer_load_voffset_large_23bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: v_mov_b32_e32 v1, 0x7ff000
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: tbuffer_load_voffset_large_23bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: s_mov_b32 s4, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 8388604, i32 0, i32 63, i32 0)
+ ret <4 x float> %data
+}
+
+define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_24bit(<4 x i32> inreg) {
+; PREGFX10-LABEL: tbuffer_load_voffset_large_24bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: s_mov_b32 s4, 0
+; PREGFX10-NEXT: v_mov_b32_e32 v1, 0xfff000
+; PREGFX10-NEXT: v_mov_b32_e32 v0, s4
+; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; PREGFX10-NEXT: s_waitcnt vmcnt(0)
+; PREGFX10-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: tbuffer_load_voffset_large_24bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: v_mov_b32_e32 v1, 0xfff000
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX11-LABEL: tbuffer_load_voffset_large_24bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: s_mov_b32 s4, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4
+; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: ; return to shader part epilog
+main_body:
+ %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 16777212, i32 0, i32 63, i32 0)
+ ret <4 x float> %data
+}
+
declare i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32>, i32, i32, i32, i32, i32)
declare <2 x i32> @llvm.amdgcn.struct.tbuffer.load.v2i32(<4 x i32>, i32, i32, i32, i32, i32)
declare <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32, i32)
ret void
}
+define amdgpu_ps void @buffer_store_voffset_large_12bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; VERDE-LABEL: buffer_store_voffset_large_12bit:
+; VERDE: ; %bb.0: ; %main_body
+; VERDE-NEXT: v_mov_b32_e32 v4, 0
+; VERDE-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offset:4092
+; VERDE-NEXT: s_endpgm
+;
+; PREGFX10-LABEL: buffer_store_voffset_large_12bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: v_mov_b32_e32 v4, 0
+; PREGFX10-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offset:4092
+; PREGFX10-NEXT: s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_12bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v4, 0
+; GFX10-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offset:4092
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_12bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: v_mov_b32_e32 v4, 0
+; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:4092
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+main_body:
+ call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 0, i32 4092, i32 0, i32 63, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @buffer_store_voffset_large_13bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; VERDE-LABEL: buffer_store_voffset_large_13bit:
+; VERDE: ; %bb.0: ; %main_body
+; VERDE-NEXT: s_mov_b32 s4, 0
+; VERDE-NEXT: v_mov_b32_e32 v5, 0x1000
+; VERDE-NEXT: v_mov_b32_e32 v4, s4
+; VERDE-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; VERDE-NEXT: s_endpgm
+;
+; PREGFX10-LABEL: buffer_store_voffset_large_13bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: s_mov_b32 s4, 0
+; PREGFX10-NEXT: v_mov_b32_e32 v5, 0x1000
+; PREGFX10-NEXT: v_mov_b32_e32 v4, s4
+; PREGFX10-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; PREGFX10-NEXT: s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_13bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: v_mov_b32_e32 v5, 0x1000
+; GFX10-NEXT: v_mov_b32_e32 v4, s4
+; GFX10-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_13bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: s_mov_b32 s4, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_dual_mov_b32 v5, 0x1000 :: v_dual_mov_b32 v4, s4
+; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+main_body:
+ call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 0, i32 8188, i32 0, i32 63, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @buffer_store_voffset_large_16bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; VERDE-LABEL: buffer_store_voffset_large_16bit:
+; VERDE: ; %bb.0: ; %main_body
+; VERDE-NEXT: s_mov_b32 s4, 0
+; VERDE-NEXT: v_mov_b32_e32 v5, 0xf000
+; VERDE-NEXT: v_mov_b32_e32 v4, s4
+; VERDE-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; VERDE-NEXT: s_endpgm
+;
+; PREGFX10-LABEL: buffer_store_voffset_large_16bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: s_mov_b32 s4, 0
+; PREGFX10-NEXT: v_mov_b32_e32 v5, 0xf000
+; PREGFX10-NEXT: v_mov_b32_e32 v4, s4
+; PREGFX10-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; PREGFX10-NEXT: s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_16bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: v_mov_b32_e32 v5, 0xf000
+; GFX10-NEXT: v_mov_b32_e32 v4, s4
+; GFX10-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_16bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: s_mov_b32 s4, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_dual_mov_b32 v5, 0xf000 :: v_dual_mov_b32 v4, s4
+; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+main_body:
+ call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 0, i32 65532, i32 0, i32 63, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @buffer_store_voffset_large_23bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; VERDE-LABEL: buffer_store_voffset_large_23bit:
+; VERDE: ; %bb.0: ; %main_body
+; VERDE-NEXT: s_mov_b32 s4, 0
+; VERDE-NEXT: v_mov_b32_e32 v5, 0x7ff000
+; VERDE-NEXT: v_mov_b32_e32 v4, s4
+; VERDE-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; VERDE-NEXT: s_endpgm
+;
+; PREGFX10-LABEL: buffer_store_voffset_large_23bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: s_mov_b32 s4, 0
+; PREGFX10-NEXT: v_mov_b32_e32 v5, 0x7ff000
+; PREGFX10-NEXT: v_mov_b32_e32 v4, s4
+; PREGFX10-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; PREGFX10-NEXT: s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_23bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: v_mov_b32_e32 v5, 0x7ff000
+; GFX10-NEXT: v_mov_b32_e32 v4, s4
+; GFX10-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_23bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: s_mov_b32 s4, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_dual_mov_b32 v5, 0x7ff000 :: v_dual_mov_b32 v4, s4
+; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+main_body:
+ call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 0, i32 8388604, i32 0, i32 63, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @buffer_store_voffset_large_24bit(<4 x i32> inreg %rsrc, <4 x float> %data) {
+; VERDE-LABEL: buffer_store_voffset_large_24bit:
+; VERDE: ; %bb.0: ; %main_body
+; VERDE-NEXT: s_mov_b32 s4, 0
+; VERDE-NEXT: v_mov_b32_e32 v5, 0xfff000
+; VERDE-NEXT: v_mov_b32_e32 v4, s4
+; VERDE-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; VERDE-NEXT: s_endpgm
+;
+; PREGFX10-LABEL: buffer_store_voffset_large_24bit:
+; PREGFX10: ; %bb.0: ; %main_body
+; PREGFX10-NEXT: s_mov_b32 s4, 0
+; PREGFX10-NEXT: v_mov_b32_e32 v5, 0xfff000
+; PREGFX10-NEXT: v_mov_b32_e32 v4, s4
+; PREGFX10-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
+; PREGFX10-NEXT: s_endpgm
+;
+; GFX10-LABEL: buffer_store_voffset_large_24bit:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: v_mov_b32_e32 v5, 0xfff000
+; GFX10-NEXT: v_mov_b32_e32 v4, s4
+; GFX10-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: buffer_store_voffset_large_24bit:
+; GFX11: ; %bb.0: ; %main_body
+; GFX11-NEXT: s_mov_b32 s4, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_dual_mov_b32 v5, 0xfff000 :: v_dual_mov_b32 v4, s4
+; GFX11-NEXT: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+main_body:
+ call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 0, i32 16777212, i32 0, i32 63, i32 0)
+ ret void
+}
+
declare void @llvm.amdgcn.struct.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32) #0
declare void @llvm.amdgcn.struct.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32, i32) #0
declare void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32) #0