Allow MIMG instructions to be selected with 6/7 VGPRs for vaddr.
Previously these were rounded up to VReg_256 this saves VGPRs.
Reviewed By: foad
Differential Revision: https://reviews.llvm.org/D103800
if (!IsNSA) {
if (ExpectedAddrSize > 8)
ExpectedAddrSize = 16;
- else if (ExpectedAddrSize > 5)
- ExpectedAddrSize = 8;
- // Allow oversized 8 VGPR vaddr when only 5 VGPR are required.
+ // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
// This provides backward compatibility for assembly created
- // before 160b types were directly supported.
- if (ExpectedAddrSize == 5 && ActualAddrSize == 8)
+ // before 160b/192b/224b types were directly supported.
+ if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
return true;
}
if (!IsNSA) {
if (AddrSize > 8)
AddrSize = 16;
- else if (AddrSize > 5)
- AddrSize = 8;
} else {
if (AddrSize > Info->VAddrDwords) {
// The NSA encoding does not contain enough operands for the combination
!if(!eq(NumWords, 3), VReg_96,
!if(!eq(NumWords, 4), VReg_128,
!if(!eq(NumWords, 5), VReg_160,
+ !if(!eq(NumWords, 6), VReg_192,
+ !if(!eq(NumWords, 7), VReg_224,
!if(!le(NumWords, 8), VReg_256,
- !if(!le(NumWords, 16), VReg_512, ?))))))));
+ !if(!le(NumWords, 16), VReg_512, ?))))))))));
// Whether the instruction variant with this vaddr size should be enabled for
// the auto-generated disassembler.
bit ret = !foldl(0, lst, lhs, y, !or(lhs, !and(!le(min, y), !le(y, max))));
}
-class MIMGAddrSizes_tmp<list<MIMGAddrSize> lst, int min> {
- list<MIMGAddrSize> List = lst;
- int Min = min;
+class MIMGAddrSizes_dw_range<list<int> range> {
+ int Min = !head(range);
+ int Max = !if(!empty(!tail(range)), Min, !head(!tail(range)));
}
class MIMG_Sampler_AddrSizes<AMDGPUSampleVariant sample> {
// required numbers of address words. The disassembler defaults to the
// smallest register class.
list<MIMGAddrSize> MachineInstrs =
- !foldl(MIMGAddrSizes_tmp<[], 0>, [1, 2, 3, 4, 5, 8, 16], lhs, dw,
- !if(isRangeInList<lhs.Min, dw, AllNumAddrWords>.ret,
- MIMGAddrSizes_tmp<
- !listconcat(lhs.List, [MIMGAddrSize<dw, !empty(lhs.List)>]),
- !if(!or(!eq(dw, 3), !eq(dw, 5)), dw, !add(dw, 1))>,
- // we still want _V4/_V8 for codegen with 3/5 dwords
- lhs)).List;
+ !foldl([]<MIMGAddrSize>,
+ !foreach(range,
+ // V4 is generated for V3 and V4
+ // V8 is generated for V5 through V8
+ // V16 is generated for V9 through V16
+ [[1],[2],[3],[3,4],[5],[6],[7],[5,8],[9,16]],
+ MIMGAddrSizes_dw_range<range>),
+ lhs, dw,
+ !if(isRangeInList<dw.Min, dw.Max, AllNumAddrWords>.ret,
+ !listconcat(lhs, [MIMGAddrSize<dw.Max, !empty(lhs)>]),
+ lhs));
// For NSA, generate machine instructions for all possible numbers of words
// except 1 (which is already covered by the non-NSA case).
ArrayRef<SDValue> Elts) {
assert(!Elts.empty());
MVT Type;
- unsigned NumElts;
-
- if (Elts.size() == 1) {
- Type = MVT::f32;
- NumElts = 1;
- } else if (Elts.size() == 2) {
- Type = MVT::v2f32;
- NumElts = 2;
- } else if (Elts.size() == 3) {
- Type = MVT::v3f32;
- NumElts = 3;
- } else if (Elts.size() <= 4) {
- Type = MVT::v4f32;
- NumElts = 4;
- } else if (Elts.size() <= 5) {
- Type = MVT::v5f32;
- NumElts = 5;
- } else if (Elts.size() <= 8) {
- Type = MVT::v8f32;
- NumElts = 8;
+ unsigned NumElts = Elts.size();
+
+ if (NumElts <= 8) {
+ Type = MVT::getVectorVT(MVT::f32, NumElts);
} else {
assert(Elts.size() <= 16);
Type = MVT::v16f32;
VAddrWords = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 32;
if (AddrWords > 8)
AddrWords = 16;
- else if (AddrWords > 5)
- AddrWords = 8;
}
if (VAddrWords != AddrWords) {
return &AMDGPU::AReg_160RegClass;
if (BitWidth <= 192)
return &AMDGPU::AReg_192RegClass;
+ if (BitWidth <= 224)
+ return &AMDGPU::AReg_224RegClass;
if (BitWidth <= 256)
return &AMDGPU::AReg_256RegClass;
if (BitWidth <= 512)
return &AMDGPU::AReg_160_Align2RegClass;
if (BitWidth <= 192)
return &AMDGPU::AReg_192_Align2RegClass;
+ if (BitWidth <= 224)
+ return &AMDGPU::AReg_224_Align2RegClass;
if (BitWidth <= 256)
return &AMDGPU::AReg_256_Align2RegClass;
if (BitWidth <= 512)
return &AMDGPU::SGPR_160RegClass;
if (BitWidth <= 192)
return &AMDGPU::SGPR_192RegClass;
+ if (BitWidth <= 224)
+ return &AMDGPU::SGPR_224RegClass;
if (BitWidth <= 256)
return &AMDGPU::SGPR_256RegClass;
if (BitWidth <= 512)
RC = &AMDGPU::VReg_128RegClass;
} else if (Info->VAddrDwords == 5) {
RC = &AMDGPU::VReg_160RegClass;
- } else if (Info->VAddrDwords <= 8) {
+ } else if (Info->VAddrDwords == 6) {
+ RC = &AMDGPU::VReg_192RegClass;
+ } else if (Info->VAddrDwords == 7) {
+ RC = &AMDGPU::VReg_224RegClass;
+ } else if (Info->VAddrDwords == 8) {
RC = &AMDGPU::VReg_256RegClass;
- NewAddrDwords = 8;
} else {
RC = &AMDGPU::VReg_512RegClass;
NewAddrDwords = 16;
; GFX10-NEXT: s_mov_b32 s9, s11
; GFX10-NEXT: s_mov_b32 s10, s12
; GFX10-NEXT: s_mov_b32 s11, s13
-; GFX10-NEXT: image_gather4_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT: image_gather4_c_b_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
main_body:
; GFX9-NEXT: v_mov_b32_e32 v11, v10
; GFX9-NEXT: v_mov_b32_e32 v12, v10
; GFX9-NEXT: v_mov_b32_e32 v13, v10
-; GFX9-NEXT: image_sample_d v[2:5], v[2:9], s[0:7], s[8:11] dmask:0xf
-; GFX9-NEXT: image_sample_d v[6:9], v[8:15], s[0:7], s[8:11] dmask:0xf
+; GFX9-NEXT: image_sample_d v[2:5], v[2:7], s[0:7], s[8:11] dmask:0xf
+; GFX9-NEXT: image_sample_d v[6:9], v[8:13], s[0:7], s[8:11] dmask:0xf
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_add_f32_e32 v5, v5, v9
; GFX9-NEXT: v_add_f32_e32 v4, v4, v8
}
; GCN-LABEL: {{^}}gather4_c_b_cl_o_2d:
-; GCN: image_gather4_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
+; GCN: image_gather4_c_b_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0x1{{$}}
define amdgpu_ps <4 x float> @gather4_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
; GFX9-NEXT: v_lshl_or_b32 v11, v7, 16, v5
; GFX9-NEXT: v_lshl_or_b32 v9, v4, 16, v3
; GFX9-NEXT: v_lshl_or_b32 v7, v1, 16, v0
-; GFX9-NEXT: image_sample_d v[0:3], v[7:14], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT: image_sample_d v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX9-NEXT: v_and_b32_e32 v0, v0, v2
; GFX9-NEXT: v_lshl_or_b32 v11, v5, 16, v1
; GFX9-NEXT: v_lshl_or_b32 v10, v3, 16, v0
-; GFX9-NEXT: image_sample_c_d_o v0, v[8:15], s[0:7], s[8:11] dmask:0x4 a16 da
+; GFX9-NEXT: image_sample_c_d_o v0, v[8:13], s[0:7], s[8:11] dmask:0x4 a16 da
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX9-NEXT: v_and_b32_e32 v0, v0, v2
; GFX9-NEXT: v_lshl_or_b32 v11, v5, 16, v1
; GFX9-NEXT: v_lshl_or_b32 v10, v3, 16, v0
-; GFX9-NEXT: image_sample_c_d_o v[0:1], v[8:15], s[0:7], s[8:11] dmask:0x6 a16 da
+; GFX9-NEXT: image_sample_c_d_o v[0:1], v[8:13], s[0:7], s[8:11] dmask:0x6 a16 da
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
; VERDE-LABEL: sample_d_2d:
; VERDE: ; %bb.0: ; %main_body
-; VERDE-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_d_2d:
; GFX6789: ; %bb.0: ; %main_body
-; GFX6789-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_d_2d:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x88,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x88,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
; VERDE-LABEL: sample_c_d_2d:
; VERDE: ; %bb.0: ; %main_body
-; VERDE-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_d_2d:
; GFX6789: ; %bb.0: ; %main_body
-; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_d_2d:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
; VERDE-LABEL: sample_d_cl_2d:
; VERDE: ; %bb.0: ; %main_body
-; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_d_cl_2d:
; GFX6789: ; %bb.0: ; %main_body
-; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_d_cl_2d:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x8c,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x8c,0xf0,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
; VERDE-LABEL: sample_cd_2d:
; VERDE: ; %bb.0: ; %main_body
-; VERDE-NEXT: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT: image_sample_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_cd_2d:
; GFX6789: ; %bb.0: ; %main_body
-; GFX6789-NEXT: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT: image_sample_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_cd_2d:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa0,0xf1,0x00,0x00,0x40,0x00]
+; GFX10-NEXT: image_sample_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa0,0xf1,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
; VERDE-LABEL: sample_c_cd_2d:
; VERDE: ; %bb.0: ; %main_body
-; VERDE-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT: image_sample_c_cd v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_c_cd_2d:
; GFX6789: ; %bb.0: ; %main_body
-; GFX6789-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT: image_sample_c_cd v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_c_cd_2d:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x00]
+; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
; VERDE-LABEL: sample_cd_cl_2d:
; VERDE: ; %bb.0: ; %main_body
-; VERDE-NEXT: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT: image_sample_cd_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
; VERDE-NEXT: s_waitcnt vmcnt(0)
; VERDE-NEXT: ; return to shader part epilog
;
; GFX6789-LABEL: sample_cd_cl_2d:
; GFX6789: ; %bb.0: ; %main_body
-; GFX6789-NEXT: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT: image_sample_cd_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
; GFX6789-NEXT: s_waitcnt vmcnt(0)
; GFX6789-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_cd_cl_2d:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa4,0xf1,0x00,0x00,0x40,0x00]
+; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa4,0xf1,0x00,0x00,0x40,0x00]
; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
; GFX10-NEXT: ; return to shader part epilog
main_body:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5
-; GFX10-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT: image_sample_c_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX10GISEL: ; %bb.0: ; %main_body
; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX10GISEL-NEXT: v_and_or_b32 v5, 0xffff, v5, v6
-; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
main_body:
; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
; GFX10GISEL-NEXT: v_and_or_b32 v4, v4, v7, v5
; GFX10GISEL-NEXT: v_and_or_b32 v5, v6, v7, s12
-; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
main_body:
; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
; GFX10GISEL-NEXT: v_and_or_b32 v5, v5, v8, v6
; GFX10GISEL-NEXT: v_and_or_b32 v6, v7, v8, s12
-; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
main_body:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5
-; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX10GISEL: ; %bb.0: ; %main_body
; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX10GISEL-NEXT: v_and_or_b32 v5, 0xffff, v5, v6
-; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
main_body:
; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
; GFX10GISEL-NEXT: v_and_or_b32 v4, v4, v7, v5
; GFX10GISEL-NEXT: v_and_or_b32 v5, v6, v7, s12
-; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
main_body:
; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
; GFX10GISEL-NEXT: v_and_or_b32 v5, v5, v8, v6
; GFX10GISEL-NEXT: v_and_or_b32 v6, v7, v8, s12
-; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
main_body:
}
; GCN-LABEL: {{^}}sample_c_b_cl_o_2d:
-; GCN: image_sample_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GCN: image_sample_c_b_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
}
; GCN-LABEL: {{^}}sample_d_o_2d:
-; GCN: image_sample_d_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GCN: image_sample_d_o v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_d_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
}
; GCN-LABEL: {{^}}sample_c_d_cl_o_1d:
-; GCN: image_sample_c_d_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GCN: image_sample_c_d_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_d_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
}
; GCN-LABEL: {{^}}sample_cd_o_2d:
-; GCN: image_sample_cd_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GCN: image_sample_cd_o v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_cd_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
}
; GCN-LABEL: {{^}}sample_c_cd_cl_o_1d:
-; GCN: image_sample_c_cd_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GCN: image_sample_c_cd_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf{{$}}
define amdgpu_ps <4 x float> @sample_c_cd_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D
; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x0c,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x02,0x01,0x00,0x00,0x00]
-image_sample_d v[64:66], v[32:39], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D
-; GFX10: image_sample_d v[64:66], v[32:39], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x07,0x88,0xf0,0x20,0x40,0x21,0x03]
+image_sample_d v[64:66], v[32:37], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D
+; GFX10: image_sample_d v[64:66], v[32:37], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x07,0x88,0xf0,0x20,0x40,0x21,0x03]
image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x02,0x01,0x00,0x14,0x15]
image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x40]
-image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
-; GFX10: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; encoding: [0x08,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x40]
+image_sample_c_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10: image_sample_c_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; encoding: [0x08,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x40]
image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x0f,0x8c,0xf0,0x00,0x00,0x40,0x40]
image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x40]
-image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
-; GFX10: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; encoding: [0x08,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x40]
+image_sample_c_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10: image_sample_c_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; encoding: [0x08,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x40]
image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x0f,0xa4,0xf1,0x00,0x00,0x40,0x40]
# GFX10: image_sample_c_b v[16:19], v[8:12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x0f,0xb4,0xf0,0x08,0x10,0x25,0x03]
0x18,0x0f,0xb4,0xf0,0x08,0x10,0x25,0x03
-# TODO: address of this instruction is v[250:255], but this register class does
-# not exist, and the next-larger size goes beyond the last register, so
-# the disassembly is not adjusted properly
-# GFX10: image_sample_c_b_cl v16, v[250:252], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x0f,0xb8,0xf0,0xfa,0x10,0x25,0x03]
+# GFX10: image_sample_c_b_cl v[16:19], v[250:255], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x0f,0xb8,0xf0,0xfa,0x10,0x25,0x03]
0x18,0x0f,0xb8,0xf0,0xfa,0x10,0x25,0x03
# GFX10: image_sample_c_lz v[16:19], v[253:255], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xbc,0xf0,0xfd,0x10,0x25,0x03]
# GFX10: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x40]
0x00,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x40
-# GFX10: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; encoding: [0x08,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x40]
+# GFX10: image_sample_c_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; encoding: [0x08,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x40]
0x08,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x40
# GFX10: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x0f,0x8c,0xf0,0x00,0x00,0x40,0x40]
# GFX10: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x40]
0x00,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x40
-# GFX10: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; encoding: [0x08,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x40]
+# GFX10: image_sample_c_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; encoding: [0x08,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x40]
0x08,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x40
# GFX10: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; encoding: [0x00,0x0f,0xa4,0xf1,0x00,0x00,0x40,0x40]