Add G16 feature for GFX10 and support A16 and G16 in GlobalISel.
Differential Revision: https://reviews.llvm.org/D76836
"Support gfx10-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands"
>;
+def FeatureG16 : SubtargetFeature<"g16",
+ "HasG16",
+ "true",
+ "Support G16 for 16-bit gradient image operands"
+>;
+
def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding",
"HasNSAEncoding",
"true",
FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
FeatureVOP3Literal, FeatureDPP8,
FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC,
- FeatureGFX10A16, FeatureFastDenormalF32
+ FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16
]
>;
def HasGFX10A16 : Predicate<"Subtarget->hasGFX10A16()">,
AssemblerPredicate<(all_of FeatureGFX10A16)>;
+def HasG16 : Predicate<"Subtarget->hasG16()">,
+ AssemblerPredicate<(all_of FeatureG16)>;
+
def HasDPP16 : Predicate<"Subtarget->hasDPP()">,
AssemblerPredicate<(all_of (not FeatureGCN3Encoding), FeatureGFX10Insts, FeatureDPP)>;
unsigned IntrOpcode = Intr->BaseOpcode;
const bool IsGFX10 = STI.getGeneration() >= AMDGPUSubtarget::GFX10;
- const LLT S16 = LLT::scalar(16);
const int VAddrIdx = getImageVAddrIdxBegin(BaseOpcode,
MI.getNumExplicitDefs());
int NumVAddr, NumGradients;
std::tie(NumVAddr, NumGradients) = getImageNumVAddr(Intr, BaseOpcode);
- const LLT AddrTy = MRI->getType(MI.getOperand(VAddrIdx).getReg());
- const bool IsA16 = AddrTy.getScalarType() == S16;
-
Register VDataIn, VDataOut;
LLT VDataTy;
int NumVDataDwords = -1;
if (!parseTexFail(MI.getOperand(CtrlIdx).getImm(), TFE, LWE, IsTexFail))
return false;
+ const int Flags = MI.getOperand(CtrlIdx + 2).getImm();
+ const bool IsA16 = (Flags & 1) != 0;
+ const bool IsG16 = (Flags & 2) != 0;
+
+ // A16 implies 16 bit gradients
+ if (IsA16 && !IsG16)
+ return false;
+
unsigned DMask = 0;
unsigned DMaskLanes = 0;
}
}
+ // Set G16 opcode
+ if (IsG16 && !IsA16) {
+ const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
+ AMDGPU::getMIMGG16MappingInfo(Intr->BaseOpcode);
+ assert(G16MappingInfo);
+ IntrOpcode = G16MappingInfo->G16; // set opcode to variant with _g16
+ }
+
// TODO: Check this in verifier.
assert((!IsTexFail || DMaskLanes >= 1) && "should have legalized this");
/// vector with s16 typed elements.
static void packImageA16AddressToDwords(MachineIRBuilder &B, MachineInstr &MI,
SmallVectorImpl<Register> &PackedAddrs,
- int AddrIdx, int DimIdx, int NumVAddrs,
+ int AddrIdx, int DimIdx, int EndIdx,
int NumGradients) {
const LLT S16 = LLT::scalar(16);
const LLT V2S16 = LLT::vector(2, 16);
- for (int I = AddrIdx; I < AddrIdx + NumVAddrs; ++I) {
+ for (int I = AddrIdx; I < EndIdx; ++I) {
MachineOperand &SrcOp = MI.getOperand(I);
if (!SrcOp.isReg())
continue; // _L to _LZ may have eliminated this.
} else {
// Dz/dh, dz/dv and the last odd coord are packed with undef. Also, in 1D,
// derivatives dx/dh and dx/dv are packed with undef.
- if (((I + 1) >= (AddrIdx + NumVAddrs)) ||
+ if (((I + 1) >= EndIdx) ||
((NumGradients / 2) % 2 == 1 &&
(I == DimIdx + (NumGradients / 2) - 1 ||
I == DimIdx + NumGradients - 1)) ||
// Index of first address argument
const int AddrIdx = getImageVAddrIdxBegin(BaseOpcode, NumDefs);
- // Check for 16 bit addresses and pack if true.
- int DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
- LLT AddrTy = MRI->getType(MI.getOperand(DimIdx).getReg());
- const bool IsA16 = AddrTy == S16;
-
int NumVAddrs, NumGradients;
std::tie(NumVAddrs, NumGradients) = getImageNumVAddr(ImageDimIntr, BaseOpcode);
const int DMaskIdx = BaseOpcode->Atomic ? -1 :
getDMaskIdx(BaseOpcode, NumDefs);
unsigned DMask = 0;
+ // Check for 16 bit addresses and pack if true.
+ int DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
+ LLT GradTy = MRI->getType(MI.getOperand(DimIdx).getReg());
+ LLT AddrTy = MRI->getType(MI.getOperand(DimIdx + NumGradients).getReg());
+ const bool IsG16 = GradTy == S16;
+ const bool IsA16 = AddrTy == S16;
+
int DMaskLanes = 0;
if (!BaseOpcode->Atomic) {
DMask = MI.getOperand(DMaskIdx).getImm();
}
}
- // If the register allocator cannot place the address registers contiguously
- // without introducing moves, then using the non-sequential address encoding
- // is always preferable, since it saves VALU instructions and is usually a
- // wash in terms of code size or even better.
- //
- // However, we currently have no way of hinting to the register allocator
- // that MIMG addresses should be placed contiguously when it is possible to
- // do so, so force non-NSA for the common 2-address case as a heuristic.
- //
- // SIShrinkInstructions will convert NSA encodings to non-NSA after register
- // allocation when possible.
- const bool UseNSA = CorrectedNumVAddrs >= 3 && ST.hasNSAEncoding();
-
// Rewrite the addressing register layout before doing anything else.
- if (IsA16) {
- // FIXME: this feature is missing from gfx10. When that is fixed, this check
- // should be introduced.
- if (!ST.hasR128A16() && !ST.hasGFX10A16())
+ if (IsA16 || IsG16) {
+ if (IsA16) {
+ // Target must support the feature and gradients need to be 16 bit too
+ if (!ST.hasA16() || !IsG16)
+ return false;
+ } else if (!ST.hasG16())
return false;
if (NumVAddrs > 1) {
SmallVector<Register, 4> PackedRegs;
- packImageA16AddressToDwords(B, MI, PackedRegs, AddrIdx, DimIdx, NumVAddrs,
- NumGradients);
+ // Don't compress addresses for G16
+ const int PackEndIdx =
+ IsA16 ? (AddrIdx + NumVAddrs) : (DimIdx + NumGradients);
+ packImageA16AddressToDwords(B, MI, PackedRegs, AddrIdx, DimIdx,
+ PackEndIdx, NumGradients);
+
+ if (!IsA16) {
+ // Add uncompressed address
+ for (int I = DimIdx + NumGradients; I != AddrIdx + NumVAddrs; ++I) {
+ int AddrReg = MI.getOperand(I).getReg();
+ assert(B.getMRI()->getType(AddrReg) == LLT::scalar(32));
+ PackedRegs.push_back(AddrReg);
+ }
+ }
+
+ // See also below in the non-a16 branch
+ const bool UseNSA = PackedRegs.size() >= 3 && ST.hasNSAEncoding();
if (!UseNSA && PackedRegs.size() > 1) {
LLT PackedAddrTy = LLT::vector(2 * PackedRegs.size(), 16);
SrcOp.setReg(AMDGPU::NoRegister);
}
}
- } else if (!UseNSA && NumVAddrs > 1) {
- convertImageAddrToPacked(B, MI, AddrIdx, NumVAddrs);
+ } else {
+ // If the register allocator cannot place the address registers contiguously
+ // without introducing moves, then using the non-sequential address encoding
+ // is always preferable, since it saves VALU instructions and is usually a
+ // wash in terms of code size or even better.
+ //
+ // However, we currently have no way of hinting to the register allocator
+ // that MIMG addresses should be placed contiguously when it is possible to
+ // do so, so force non-NSA for the common 2-address case as a heuristic.
+ //
+ // SIShrinkInstructions will convert NSA encodings to non-NSA after register
+ // allocation when possible.
+ const bool UseNSA = CorrectedNumVAddrs >= 3 && ST.hasNSAEncoding();
+
+ if (!UseNSA && NumVAddrs > 1)
+ convertImageAddrToPacked(B, MI, AddrIdx, NumVAddrs);
}
+ int Flags = 0;
+ if (IsA16)
+ Flags |= 1;
+ if (IsG16)
+ Flags |= 2;
+ MI.addOperand(MachineOperand::CreateImm(Flags));
if (BaseOpcode->Store) { // No TFE for stores?
// TODO: Handle dmask trim
HasDPP8(false),
HasR128A16(false),
HasGFX10A16(false),
+ HasG16(false),
HasNSAEncoding(false),
HasDLInsts(false),
HasDot1Insts(false),
bool HasDPP8;
bool HasR128A16;
bool HasGFX10A16;
+ bool HasG16;
bool HasNSAEncoding;
bool HasDLInsts;
bool HasDot1Insts;
return HasGFX10A16;
}
+ bool hasA16() const { return hasR128A16() || hasGFX10A16(); }
+
+ bool hasG16() const { return HasG16; }
+
bool hasOffset3fBug() const {
return HasOffset3fBug;
}
bit Gather4 = 0;
bits<8> NumExtraArgs = 0;
bit Gradients = 0;
+ bit G16 = 0;
bit Coordinates = 1;
bit LodOrClampOrMip = 0;
bit HasD16 = 0;
def MIMGBaseOpcodesTable : GenericTable {
let FilterClass = "MIMGBaseOpcode";
let CppTypeName = "MIMGBaseOpcodeInfo";
- let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler", "Gather4",
- "NumExtraArgs", "Gradients", "Coordinates", "LodOrClampOrMip",
- "HasD16"];
+ let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler",
+ "Gather4", "NumExtraArgs", "Gradients", "G16", "Coordinates",
+ "LodOrClampOrMip", "HasD16"];
GenericEnum TypeOf_BaseOpcode = MIMGBaseOpcode;
let PrimaryKey = ["BaseOpcode"];
let PrimaryKeyName = "getMIMGMIPMappingInfo";
}
+class MIMGG16Mapping<MIMGBaseOpcode g, MIMGBaseOpcode g16> {
+ MIMGBaseOpcode G = g;
+ MIMGBaseOpcode G16 = g16;
+}
+
+def MIMGG16MappingTable : GenericTable {
+ let FilterClass = "MIMGG16Mapping";
+ let CppTypeName = "MIMGG16MappingInfo";
+ let Fields = ["G", "G16"];
+ GenericEnum TypeOf_G = MIMGBaseOpcode;
+ GenericEnum TypeOf_G16 = MIMGBaseOpcode;
+
+ let PrimaryKey = ["G"];
+ let PrimaryKeyName = "getMIMGG16MappingInfo";
+}
+
class MIMG_Base <dag outs, string dns = "">
: InstSI <outs, (ins), "", []> {
}
multiclass MIMG_Sampler <bits<8> op, AMDGPUSampleVariant sample, bit wqm = 0,
- bit isGetLod = 0,
- string asm = "image_sample"#sample.LowerCaseMod> {
+ bit isG16 = 0, bit isGetLod = 0,
+ string asm = "image_sample"#sample.LowerCaseMod#!if(isG16, "_g16", "")> {
def "" : MIMG_Sampler_BaseOpcode<sample> {
let HasD16 = !if(isGetLod, 0, 1);
+ let G16 = isG16;
}
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME), WQM = wqm,
//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>; -- not on VI
//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>; -- not on VI
//} // End let FPAtomic = 1
-defm IMAGE_SAMPLE : MIMG_Sampler_WQM <0x00000020, AMDGPUSample>;
-defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <0x00000021, AMDGPUSample_cl>;
-defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, AMDGPUSample_d>;
-defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <0x00000023, AMDGPUSample_d_cl>;
-defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, AMDGPUSample_l>;
-defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM <0x00000025, AMDGPUSample_b>;
-defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM <0x00000026, AMDGPUSample_b_cl>;
-defm IMAGE_SAMPLE_LZ : MIMG_Sampler <0x00000027, AMDGPUSample_lz>;
-defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM <0x00000028, AMDGPUSample_c>;
-defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM <0x00000029, AMDGPUSample_c_cl>;
-defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, AMDGPUSample_c_d>;
-defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <0x0000002b, AMDGPUSample_c_d_cl>;
-defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, AMDGPUSample_c_l>;
-defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM <0x0000002d, AMDGPUSample_c_b>;
-defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM <0x0000002e, AMDGPUSample_c_b_cl>;
-defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <0x0000002f, AMDGPUSample_c_lz>;
-defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM <0x00000030, AMDGPUSample_o>;
-defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM <0x00000031, AMDGPUSample_cl_o>;
-defm IMAGE_SAMPLE_D_O : MIMG_Sampler <0x00000032, AMDGPUSample_d_o>;
-defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <0x00000033, AMDGPUSample_d_cl_o>;
-defm IMAGE_SAMPLE_L_O : MIMG_Sampler <0x00000034, AMDGPUSample_l_o>;
-defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM <0x00000035, AMDGPUSample_b_o>;
-defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM <0x00000036, AMDGPUSample_b_cl_o>;
-defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <0x00000037, AMDGPUSample_lz_o>;
-defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM <0x00000038, AMDGPUSample_c_o>;
-defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM <0x00000039, AMDGPUSample_c_cl_o>;
-defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <0x0000003a, AMDGPUSample_c_d_o>;
-defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <0x0000003b, AMDGPUSample_c_d_cl_o>;
-defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <0x0000003c, AMDGPUSample_c_l_o>;
-defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM <0x0000003e, AMDGPUSample_c_b_cl_o>;
-defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM <0x0000003d, AMDGPUSample_c_b_o>;
-defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <0x0000003f, AMDGPUSample_c_lz_o>;
-defm IMAGE_GATHER4 : MIMG_Gather_WQM <0x00000040, AMDGPUSample>;
-defm IMAGE_GATHER4_CL : MIMG_Gather_WQM <0x00000041, AMDGPUSample_cl>;
-defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, AMDGPUSample_l>;
-defm IMAGE_GATHER4_B : MIMG_Gather_WQM <0x00000045, AMDGPUSample_b>;
-defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM <0x00000046, AMDGPUSample_b_cl>;
-defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, AMDGPUSample_lz>;
-defm IMAGE_GATHER4_C : MIMG_Gather_WQM <0x00000048, AMDGPUSample_c>;
-defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM <0x00000049, AMDGPUSample_c_cl>;
-defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, AMDGPUSample_c_l>;
-defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM <0x0000004d, AMDGPUSample_c_b>;
-defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM <0x0000004e, AMDGPUSample_c_b_cl>;
-defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, AMDGPUSample_c_lz>;
-defm IMAGE_GATHER4_O : MIMG_Gather_WQM <0x00000050, AMDGPUSample_o>;
-defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM <0x00000051, AMDGPUSample_cl_o>;
-defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, AMDGPUSample_l_o>;
-defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM <0x00000055, AMDGPUSample_b_o>;
-defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, AMDGPUSample_b_cl_o>;
-defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, AMDGPUSample_lz_o>;
-defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM <0x00000058, AMDGPUSample_c_o>;
-defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM <0x00000059, AMDGPUSample_c_cl_o>;
-defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, AMDGPUSample_c_l_o>;
-defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM <0x0000005d, AMDGPUSample_c_b_o>;
-defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <0x0000005e, AMDGPUSample_c_b_cl_o>;
-defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, AMDGPUSample_c_lz_o>;
-
-defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, AMDGPUSample, 1, 1, "image_get_lod">;
-
-defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, AMDGPUSample_cd>;
-defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <0x00000069, AMDGPUSample_cd_cl>;
-defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <0x0000006a, AMDGPUSample_c_cd>;
-defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler <0x0000006b, AMDGPUSample_c_cd_cl>;
-defm IMAGE_SAMPLE_CD_O : MIMG_Sampler <0x0000006c, AMDGPUSample_cd_o>;
-defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler <0x0000006d, AMDGPUSample_cd_cl_o>;
-defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler <0x0000006e, AMDGPUSample_c_cd_o>;
-defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, AMDGPUSample_c_cd_cl_o>;
+defm IMAGE_SAMPLE : MIMG_Sampler_WQM <0x00000020, AMDGPUSample>;
+defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <0x00000021, AMDGPUSample_cl>;
+defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, AMDGPUSample_d>;
+defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <0x00000023, AMDGPUSample_d_cl>;
+defm IMAGE_SAMPLE_D_G16 : MIMG_Sampler <0x000000a2, AMDGPUSample_d, 0, 1>;
+defm IMAGE_SAMPLE_D_CL_G16 : MIMG_Sampler <0x000000a3, AMDGPUSample_d_cl, 0, 1>;
+defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, AMDGPUSample_l>;
+defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM <0x00000025, AMDGPUSample_b>;
+defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM <0x00000026, AMDGPUSample_b_cl>;
+defm IMAGE_SAMPLE_LZ : MIMG_Sampler <0x00000027, AMDGPUSample_lz>;
+defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM <0x00000028, AMDGPUSample_c>;
+defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM <0x00000029, AMDGPUSample_c_cl>;
+defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, AMDGPUSample_c_d>;
+defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <0x0000002b, AMDGPUSample_c_d_cl>;
+defm IMAGE_SAMPLE_C_D_G16 : MIMG_Sampler <0x000000aa, AMDGPUSample_c_d, 0, 1>;
+defm IMAGE_SAMPLE_C_D_CL_G16 : MIMG_Sampler <0x000000ab, AMDGPUSample_c_d_cl, 0, 1>;
+defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, AMDGPUSample_c_l>;
+defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM <0x0000002d, AMDGPUSample_c_b>;
+defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM <0x0000002e, AMDGPUSample_c_b_cl>;
+defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <0x0000002f, AMDGPUSample_c_lz>;
+defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM <0x00000030, AMDGPUSample_o>;
+defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM <0x00000031, AMDGPUSample_cl_o>;
+defm IMAGE_SAMPLE_D_O : MIMG_Sampler <0x00000032, AMDGPUSample_d_o>;
+defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <0x00000033, AMDGPUSample_d_cl_o>;
+defm IMAGE_SAMPLE_D_O_G16 : MIMG_Sampler <0x000000b2, AMDGPUSample_d_o, 0, 1>;
+defm IMAGE_SAMPLE_D_CL_O_G16 : MIMG_Sampler <0x000000b3, AMDGPUSample_d_cl_o, 0, 1>;
+defm IMAGE_SAMPLE_L_O : MIMG_Sampler <0x00000034, AMDGPUSample_l_o>;
+defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM <0x00000035, AMDGPUSample_b_o>;
+defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM <0x00000036, AMDGPUSample_b_cl_o>;
+defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <0x00000037, AMDGPUSample_lz_o>;
+defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM <0x00000038, AMDGPUSample_c_o>;
+defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM <0x00000039, AMDGPUSample_c_cl_o>;
+defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <0x0000003a, AMDGPUSample_c_d_o>;
+defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <0x0000003b, AMDGPUSample_c_d_cl_o>;
+defm IMAGE_SAMPLE_C_D_O_G16 : MIMG_Sampler <0x000000ba, AMDGPUSample_c_d_o, 0, 1>;
+defm IMAGE_SAMPLE_C_D_CL_O_G16 : MIMG_Sampler <0x000000bb, AMDGPUSample_c_d_cl_o, 0, 1>;
+defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <0x0000003c, AMDGPUSample_c_l_o>;
+defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM <0x0000003e, AMDGPUSample_c_b_cl_o>;
+defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM <0x0000003d, AMDGPUSample_c_b_o>;
+defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <0x0000003f, AMDGPUSample_c_lz_o>;
+defm IMAGE_GATHER4 : MIMG_Gather_WQM <0x00000040, AMDGPUSample>;
+defm IMAGE_GATHER4_CL : MIMG_Gather_WQM <0x00000041, AMDGPUSample_cl>;
+defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, AMDGPUSample_l>;
+defm IMAGE_GATHER4_B : MIMG_Gather_WQM <0x00000045, AMDGPUSample_b>;
+defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM <0x00000046, AMDGPUSample_b_cl>;
+defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, AMDGPUSample_lz>;
+defm IMAGE_GATHER4_C : MIMG_Gather_WQM <0x00000048, AMDGPUSample_c>;
+defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM <0x00000049, AMDGPUSample_c_cl>;
+defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, AMDGPUSample_c_l>;
+defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM <0x0000004d, AMDGPUSample_c_b>;
+defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM <0x0000004e, AMDGPUSample_c_b_cl>;
+defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, AMDGPUSample_c_lz>;
+defm IMAGE_GATHER4_O : MIMG_Gather_WQM <0x00000050, AMDGPUSample_o>;
+defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM <0x00000051, AMDGPUSample_cl_o>;
+defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, AMDGPUSample_l_o>;
+defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM <0x00000055, AMDGPUSample_b_o>;
+defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, AMDGPUSample_b_cl_o>;
+defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, AMDGPUSample_lz_o>;
+defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM <0x00000058, AMDGPUSample_c_o>;
+defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM <0x00000059, AMDGPUSample_c_cl_o>;
+defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, AMDGPUSample_c_l_o>;
+defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM <0x0000005d, AMDGPUSample_c_b_o>;
+defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <0x0000005e, AMDGPUSample_c_b_cl_o>;
+defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, AMDGPUSample_c_lz_o>;
+
+defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, AMDGPUSample, 1, 0, 1, "image_get_lod">;
+
+defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, AMDGPUSample_cd>;
+defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <0x00000069, AMDGPUSample_cd_cl>;
+defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <0x0000006a, AMDGPUSample_c_cd>;
+defm IMAGE_SAMPLE_C_CD_CL : MIMG_Sampler <0x0000006b, AMDGPUSample_c_cd_cl>;
+defm IMAGE_SAMPLE_CD_O : MIMG_Sampler <0x0000006c, AMDGPUSample_cd_o>;
+defm IMAGE_SAMPLE_CD_CL_O : MIMG_Sampler <0x0000006d, AMDGPUSample_cd_cl_o>;
+defm IMAGE_SAMPLE_C_CD_O : MIMG_Sampler <0x0000006e, AMDGPUSample_c_cd_o>;
+defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, AMDGPUSample_c_cd_cl_o>;
+defm IMAGE_SAMPLE_CD_G16 : MIMG_Sampler <0x000000e8, AMDGPUSample_cd, 0, 1>;
+defm IMAGE_SAMPLE_CD_CL_G16 : MIMG_Sampler <0x000000e9, AMDGPUSample_cd_cl, 0, 1>;
+defm IMAGE_SAMPLE_C_CD_G16 : MIMG_Sampler <0x000000ea, AMDGPUSample_c_cd, 0, 1>;
+defm IMAGE_SAMPLE_C_CD_CL_G16 : MIMG_Sampler <0x000000eb, AMDGPUSample_c_cd_cl, 0, 1>;
+defm IMAGE_SAMPLE_CD_O_G16 : MIMG_Sampler <0x000000ec, AMDGPUSample_cd_o, 0, 1>;
+defm IMAGE_SAMPLE_CD_CL_O_G16 : MIMG_Sampler <0x000000ed, AMDGPUSample_cd_cl_o, 0, 1>;
+defm IMAGE_SAMPLE_C_CD_O_G16 : MIMG_Sampler <0x000000ee, AMDGPUSample_c_cd_o, 0, 1>;
+defm IMAGE_SAMPLE_C_CD_CL_O_G16 : MIMG_Sampler <0x000000ef, AMDGPUSample_c_cd_cl_o, 0, 1>;
//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", 0x0000007e>;
//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>;
// MIP to NONMIP Optimization Mapping
def : MIMGMIPMapping<IMAGE_LOAD_MIP, IMAGE_LOAD>;
def : MIMGMIPMapping<IMAGE_STORE_MIP, IMAGE_STORE>;
+
+// G to G16 Optimization Mapping
+def : MIMGG16Mapping<IMAGE_SAMPLE_D, IMAGE_SAMPLE_D_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL, IMAGE_SAMPLE_D_CL_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_D, IMAGE_SAMPLE_C_D_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_CL, IMAGE_SAMPLE_C_D_CL_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_D_O, IMAGE_SAMPLE_D_O_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL_O, IMAGE_SAMPLE_D_CL_O_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_O, IMAGE_SAMPLE_C_D_O_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_D_CL_O, IMAGE_SAMPLE_C_D_CL_O_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_CD, IMAGE_SAMPLE_CD_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_CD_CL, IMAGE_SAMPLE_CD_CL_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD, IMAGE_SAMPLE_C_CD_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_CL, IMAGE_SAMPLE_C_CD_CL_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_CD_O, IMAGE_SAMPLE_CD_O_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_CD_CL_O, IMAGE_SAMPLE_CD_CL_O_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_O, IMAGE_SAMPLE_C_CD_O_G16>;
+def : MIMGG16Mapping<IMAGE_SAMPLE_C_CD_CL_O, IMAGE_SAMPLE_C_CD_CL_O_G16>;
return Value == 0;
}
+static void packImageA16AddressToDwords(SelectionDAG &DAG, SDValue Op,
+ MVT PackVectorVT,
+ SmallVectorImpl<SDValue> &PackedAddrs,
+ unsigned DimIdx, unsigned EndIdx,
+ unsigned NumGradients) {
+ SDLoc DL(Op);
+ for (unsigned I = DimIdx; I < EndIdx; I++) {
+ SDValue Addr = Op.getOperand(I);
+
+ // Gradients are packed with undef for each coordinate.
+ // In <hi 16 bit>,<lo 16 bit> notation, the registers look like this:
+ // 1D: undef,dx/dh; undef,dx/dv
+ // 2D: dy/dh,dx/dh; dy/dv,dx/dv
+ // 3D: dy/dh,dx/dh; undef,dz/dh; dy/dv,dx/dv; undef,dz/dv
+ if (((I + 1) >= EndIdx) ||
+ ((NumGradients / 2) % 2 == 1 && (I == DimIdx + (NumGradients / 2) - 1 ||
+ I == DimIdx + NumGradients - 1))) {
+ if (Addr.getValueType() != MVT::i16)
+ Addr = DAG.getBitcast(MVT::i16, Addr);
+ Addr = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Addr);
+ } else {
+ Addr = DAG.getBuildVector(PackVectorVT, DL, {Addr, Op.getOperand(I + 1)});
+ I++;
+ }
+ Addr = DAG.getBitcast(MVT::f32, Addr);
+ PackedAddrs.push_back(Addr);
+ }
+}
+
SDValue SITargetLowering::lowerImage(SDValue Op,
const AMDGPU::ImageDimIntrinsicInfo *Intr,
SelectionDAG &DAG) const {
SmallVector<EVT, 3> ResultTypes(Op->value_begin(), Op->value_end());
SmallVector<EVT, 3> OrigResultTypes(Op->value_begin(), Op->value_end());
bool IsD16 = false;
+ bool IsG16 = false;
bool IsA16 = false;
SDValue VData;
int NumVDataDwords;
}
}
- // Check for 16 bit addresses and pack if true.
+ // Push back extra arguments.
+ for (unsigned I = 0; I < BaseOpcode->NumExtraArgs; I++)
+ VAddrs.push_back(Op.getOperand(AddrIdx + I));
+
+ // Check for 16 bit addresses or derivatives and pack if true.
unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
+ unsigned CoordIdx = DimIdx + NumGradients;
+ unsigned CoordsEnd = AddrIdx + NumMIVAddrs;
+
MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
- const MVT VAddrScalarVT = VAddrVT.getScalarType();
- if (((VAddrScalarVT == MVT::f16) || (VAddrScalarVT == MVT::i16))) {
- // Illegal to use a16 images
- if (!ST->hasFeature(AMDGPU::FeatureR128A16) && !ST->hasFeature(AMDGPU::FeatureGFX10A16))
+ MVT VAddrScalarVT = VAddrVT.getScalarType();
+ MVT PackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
+ IsG16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16;
+
+ VAddrVT = Op.getOperand(CoordIdx).getSimpleValueType();
+ VAddrScalarVT = VAddrVT.getScalarType();
+ IsA16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16;
+ if (IsA16 || IsG16) {
+ if (IsA16) {
+ if (!ST->hasA16()) {
+ LLVM_DEBUG(dbgs() << "Failed to lower image intrinsic: Target does not "
+ "support 16 bit addresses\n");
+ return Op;
+ }
+ if (!IsG16) {
+ LLVM_DEBUG(
+ dbgs() << "Failed to lower image intrinsic: 16 bit addresses "
+ "need 16 bit derivatives but got 32 bit derivatives\n");
+ return Op;
+ }
+ } else if (!ST->hasG16()) {
+ LLVM_DEBUG(dbgs() << "Failed to lower image intrinsic: Target does not "
+ "support 16 bit derivatives\n");
return Op;
+ }
- IsA16 = true;
- const MVT VectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
- for (unsigned i = AddrIdx; i < (AddrIdx + NumMIVAddrs); ++i) {
- SDValue AddrLo;
- // Push back extra arguments.
- if (i < DimIdx) {
- AddrLo = Op.getOperand(i);
- } else {
- // Dz/dh, dz/dv and the last odd coord are packed with undef. Also,
- // in 1D, derivatives dx/dh and dx/dv are packed with undef.
- if (((i + 1) >= (AddrIdx + NumMIVAddrs)) ||
- ((NumGradients / 2) % 2 == 1 &&
- (i == DimIdx + (NumGradients / 2) - 1 ||
- i == DimIdx + NumGradients - 1))) {
- AddrLo = Op.getOperand(i);
- if (AddrLo.getValueType() != MVT::i16)
- AddrLo = DAG.getBitcast(MVT::i16, Op.getOperand(i));
- AddrLo = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, AddrLo);
- } else {
- AddrLo = DAG.getBuildVector(VectorVT, DL,
- {Op.getOperand(i), Op.getOperand(i + 1)});
- i++;
- }
- AddrLo = DAG.getBitcast(MVT::f32, AddrLo);
+ if (BaseOpcode->Gradients && !IsA16) {
+ if (!ST->hasG16()) {
+ LLVM_DEBUG(dbgs() << "Failed to lower image intrinsic: Target does not "
+ "support 16 bit derivatives\n");
+ return Op;
}
- VAddrs.push_back(AddrLo);
+ // Activate g16
+ const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
+ AMDGPU::getMIMGG16MappingInfo(Intr->BaseOpcode);
+ IntrOpcode = G16MappingInfo->G16; // set new opcode to variant with _g16
+ }
+
+ // Don't compress addresses for G16
+ const int PackEndIdx = IsA16 ? CoordsEnd : CoordIdx;
+ packImageA16AddressToDwords(DAG, Op, PackVectorVT, VAddrs, DimIdx,
+ PackEndIdx, NumGradients);
+
+ if (!IsA16) {
+ // Add uncompressed address
+ for (unsigned I = CoordIdx; I < CoordsEnd; I++)
+ VAddrs.push_back(Op.getOperand(I));
}
} else {
- for (unsigned i = 0; i < NumMIVAddrs; ++i)
- VAddrs.push_back(Op.getOperand(AddrIdx + i));
+ for (unsigned I = DimIdx; I < CoordsEnd; I++)
+ VAddrs.push_back(Op.getOperand(I));
}
// If the register allocator cannot place the address registers contiguously
using namespace llvm;
+#define DEBUG_TYPE "si-instr-info"
+
#define GET_INSTRINFO_CTOR_DTOR
#include "AMDGPUGenInstrInfo.inc"
IsA16 = A16->getImm() != 0;
}
- bool PackDerivatives = IsA16; // Either A16 or G16
+ bool PackDerivatives = IsA16 || BaseOpcode->G16;
bool IsNSA = SRsrcIdx - VAddr0Idx > 1;
unsigned AddrWords = BaseOpcode->NumExtraArgs;
}
if (VAddrWords != AddrWords) {
+ LLVM_DEBUG(dbgs() << "bad vaddr size, expected " << AddrWords
+ << " but got " << VAddrWords << "\n");
ErrInfo = "bad vaddr size";
return false;
}
#define GET_MIMGInfoTable_IMPL
#define GET_MIMGLZMappingTable_IMPL
#define GET_MIMGMIPMappingTable_IMPL
+#define GET_MIMGG16MappingTable_IMPL
#include "AMDGPUGenSearchableTables.inc"
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
return STI.getFeatureBits()[AMDGPU::FeatureGFX10A16];
}
+bool hasG16(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureG16];
+}
+
bool hasPackedD16(const MCSubtargetInfo &STI) {
return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
}
uint8_t NumExtraArgs;
bool Gradients;
+ bool G16;
bool Coordinates;
bool LodOrClampOrMip;
bool HasD16;
MIMGBaseOpcode NONMIP;
};
+struct MIMGG16MappingInfo {
+ MIMGBaseOpcode G;
+ MIMGBaseOpcode G16;
+};
+
LLVM_READONLY
const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
LLVM_READONLY
-const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L);
+const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
+
+LLVM_READONLY
+const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
LLVM_READONLY
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
bool hasSRAMECC(const MCSubtargetInfo &STI);
bool hasMIMG_R128(const MCSubtargetInfo &STI);
bool hasGFX10A16(const MCSubtargetInfo &STI);
+bool hasG16(const MCSubtargetInfo &STI);
bool hasPackedD16(const MCSubtargetInfo &STI);
bool isSI(const MCSubtargetInfo &STI);
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32)
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[DEF]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[DEF]](s32)
- ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32)
; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32)
; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
- ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32)
- ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32)
; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32)
; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32)
- ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32)
- ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32)
- ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10: $vgpr0 = COPY [[UV]](s32)
; GFX10: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10-LABEL: name: store_mip_1d
; GFX10: bb.1.main_body:
; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[DEF]](s32)
- ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10: S_ENDPGM 0
main_body:
call void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 0, <8 x i32> %rsrc, i32 0, i32 0)
; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10-LABEL: name: store_mip_2d
; GFX10: bb.1.main_body:
; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
- ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10: S_ENDPGM 0
main_body:
call void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 0, <8 x i32> %rsrc, i32 0, i32 0)
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10-LABEL: name: store_mip_3d
; GFX10: bb.1.main_body:
; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
- ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10: S_ENDPGM 0
main_body:
call void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %u, i16 0, <8 x i32> %rsrc, i32 0, i32 0)
; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10-LABEL: name: store_mip_1darray
; GFX10: bb.1.main_body:
; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
- ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10: S_ENDPGM 0
main_body:
call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 0, <8 x i32> %rsrc, i32 0, i32 0)
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10-LABEL: name: store_mip_2darray
; GFX10: bb.1.main_body:
; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
- ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10: S_ENDPGM 0
main_body:
call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %u, i16 0, <8 x i32> %rsrc, i32 0, i32 0)
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10-LABEL: name: store_mip_cube
; GFX10: bb.1.main_body:
; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
- ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10: S_ENDPGM 0
main_body:
call void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %u, i16 0, <8 x i32> %rsrc, i32 0, i32 0)
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_swap_1d
; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_add_1d
; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_sub_1d
; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_smin_1d
; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_umin_1d
; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_smax_1d
; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_umax_1d
; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_and_1d
; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_or_1d
; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_xor_1d
; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_inc_1d
; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_dec_1d
; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_cmpswap_1d
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_add_2d
; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_add_3d
; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_add_cube
; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_add_1darray
; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32)
; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_add_2darray
; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_add_2dmsaa
; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_add_2darraymsaa
; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_add_1d_slc
; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_cmpswap_2d
; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32)
; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_cmpswap_3d
; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: atomic_cmpswap_2darraymsaa
; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+r128-a16 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s
define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
; GFX9-LABEL: name: load_1d
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2dmsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_1d
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords, i32 0
; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_2d
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords, i32 0
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_3d
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords_lo, i32 0
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_cube
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords_lo, i32 0
; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_1darray
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords, i32 0
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_2darray
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords_lo, i32 0
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_2dmsaa
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2dmsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords_lo, i32 0
; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_2darraymsaa
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords_lo, i32 0
; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_mip_1d
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords, i32 0
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_mip_2d
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords_lo, i32 0
; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_mip_3d
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords_lo, i32 0
; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_mip_cube
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords_lo, i32 0
; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_mip_1darray
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords_lo, i32 0
; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_mip_2darray
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords_lo, i32 0
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 4 from custom "TargetCustom8")
; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: load_1d_V1
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 4 from custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
main_body:
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 8 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 8 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 4 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 4 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_1d_V1
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 4 into custom "TargetCustom8")
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 4 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords, i32 0
; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 8 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 8 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_1d_V2
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 8 into custom "TargetCustom8")
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 8 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords, i32 0
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX9: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_1d_glc
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords, i32 0
; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_1d_slc
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords, i32 0
; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX9: S_ENDPGM 0
; GFX10NSA-LABEL: name: store_1d_glc_slc
; GFX10NSA: bb.1.main_body:
; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
+ ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store 16 into custom "TargetCustom8")
; GFX10NSA: S_ENDPGM 0
main_body:
%s = extractelement <2 x i16> %coords, i32 0
; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>)
; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF1]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF1]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GFX9: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; UNPACKED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16)
; UNPACKED: $vgpr0 = COPY [[ANYEXT]](s32)
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; PACKED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16)
; PACKED: $vgpr0 = COPY [[ANYEXT]](s32)
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>)
; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[DEF]](<2 x s16>)
; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>)
; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>)
; PACKED: $vgpr1 = COPY [[UV1]](<2 x s16>)
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
; UNPACKED: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
; PACKED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
; PACKED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>)
; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>)
; PACKED: $vgpr1 = COPY [[UV1]](<2 x s16>)
; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
; PACKED: $vgpr1 = COPY [[DEF]](<2 x s16>)
; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]]
; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
; PACKED: $vgpr1 = COPY [[DEF]](<2 x s16>)
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; PACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; UNPACKED: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
+ ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; GCN: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0
%tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; GCN: $vgpr0 = COPY [[UV]](s32)
; GCN: $vgpr1 = COPY [[UV1]](s32)
; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 12 from custom "TargetCustom8", align 16)
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 12 from custom "TargetCustom8", align 16)
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
; GCN: $vgpr0 = COPY [[UV]](s32)
; GCN: $vgpr1 = COPY [[UV1]](s32)
; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GCN: $vgpr0 = COPY [[UV]](s32)
; GCN: $vgpr1 = COPY [[UV1]](s32)
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: $vgpr0 = COPY [[UV]](s32)
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
; GCN: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: $vgpr0 = COPY [[UV]](s32)
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 12 from custom "TargetCustom8", align 16)
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 12 from custom "TargetCustom8", align 16)
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GCN: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: $vgpr0 = COPY [[UV]](s32)
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
; GCN: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: $vgpr0 = COPY [[UV]](s32)
; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GCN: $vgpr1 = COPY [[DEF]](s32)
; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN: $vgpr0 = COPY [[UV]](s32)
; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GCN: $vgpr1 = COPY [[DEF]](s32)
; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 12 from custom "TargetCustom8", align 16)
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 12 from custom "TargetCustom8", align 16)
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN: $vgpr0 = COPY [[UV]](s32)
; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN: $vgpr0 = COPY [[UV]](s32)
; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GCN: $vgpr1 = COPY [[DEF]](s32)
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: $vgpr0 = COPY [[UV]](s32)
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 12 from custom "TargetCustom8", align 16)
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 12 from custom "TargetCustom8", align 16)
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GCN: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GCN: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; GCN: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GFX6: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
- ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x s32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x s32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX6: $vgpr0 = COPY [[UV]](s32)
; GFX6: $vgpr1 = COPY [[UV1]](s32)
; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: $vgpr1 = COPY [[UV1]](s32)
; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX6: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32)
- ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x s32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[BUILD_VECTOR1]](<4 x s32>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
; GFX6: G_STORE [[UV4]](s32), [[MV]](p1) :: (store 4 into %ir.out, addrspace 1)
; GFX6: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX10NSA: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
; GFX10NSA: G_STORE [[UV4]](s32), [[MV]](p1) :: (store 4 into %ir.out, addrspace 1)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
; GFX6: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32)
- ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; GFX6: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10NSA-LABEL: name: image_load_3d_f32
; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0
%tex = call float @llvm.amdgcn.image.load.3d.f32.i32(i32 1, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX6: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GFX6: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32)
- ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GFX6: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; GFX6: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GFX6: $vgpr0 = COPY [[UV]](s32)
; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
; GFX10NSA: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GFX10NSA: $vgpr0 = COPY [[UV]](s32)
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s
+
+define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
+ ; GFX9-LABEL: name: sample_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
+ ; GFX9-LABEL: name: sample_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
+ ; GFX9-LABEL: name: sample_3d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_3d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
+ ; GFX9-LABEL: name: sample_cube
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_cube
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
+ ; GFX9-LABEL: name: sample_1darray
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_1darray
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
+ ; GFX9-LABEL: name: sample_2darray
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_2darray
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
+ ; GFX9-LABEL: name: sample_c_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
+ ; GFX9-LABEL: name: sample_c_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
+ ; GFX9-LABEL: name: sample_cl_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_cl_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
+ ; GFX9-LABEL: name: sample_cl_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_cl_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
+ ; GFX9-LABEL: name: sample_c_cl_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_cl_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
+ ; GFX9-LABEL: name: sample_c_cl_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_cl_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
+ ; GFX9-LABEL: name: sample_b_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_b_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
+ ; GFX9-LABEL: name: sample_b_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_b_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
+ ; GFX9-LABEL: name: sample_c_b_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_b_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
+ ; GFX9-LABEL: name: sample_c_b_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_b_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
+ ; GFX9-LABEL: name: sample_b_cl_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_b_cl_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
+ ; GFX9-LABEL: name: sample_b_cl_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_b_cl_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
+ ; GFX9-LABEL: name: sample_c_b_cl_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_b_cl_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
+ ; GFX9-LABEL: name: sample_c_b_cl_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[COPY18]](s32)
+ ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_b_cl_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[COPY18]](s32)
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
+ ; GFX9-LABEL: name: sample_d_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_d_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
+ ; GFX9-LABEL: name: sample_d_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_d_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
+ ; GFX9-LABEL: name: sample_d_3d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[DEF]](s32)
+ ; GFX9: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY24]](s32), [[COPY25]](s32)
+ ; GFX9: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[DEF]](s32)
+ ; GFX9: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; GFX9: [[COPY28:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[COPY28]](s32)
+ ; GFX9: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY29]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_d_3d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY24]](s32), [[COPY25]](s32)
+ ; GFX10: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; GFX10: [[COPY28:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[COPY28]](s32)
+ ; GFX10: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY29]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
+ ; GFX9-LABEL: name: sample_c_d_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_d_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
+ ; GFX9-LABEL: name: sample_c_d_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
+ ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX9: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_d_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
+ ; GFX9-LABEL: name: sample_d_cl_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_d_cl_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
+ ; GFX9-LABEL: name: sample_d_cl_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
+ ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX9: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
+ ; GFX9: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_d_cl_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
+ ; GFX10: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
+ ; GFX9-LABEL: name: sample_c_d_cl_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
+ ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_d_cl_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
+ ; GFX9-LABEL: name: sample_c_d_cl_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)
+ ; GFX9: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX9: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY24]](s32), [[COPY25]](s32)
+ ; GFX9: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<10 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_d_cl_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)
+ ; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX10: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY24]](s32), [[COPY25]](s32)
+ ; GFX10: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
+ ; GFX9-LABEL: name: sample_cd_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_cd_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
+ ; GFX9-LABEL: name: sample_cd_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_cd_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
+ ; GFX9-LABEL: name: sample_c_cd_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_cd_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
+ ; GFX9-LABEL: name: sample_c_cd_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
+ ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX9: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_cd_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
+ ; GFX9-LABEL: name: sample_cd_cl_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_cd_cl_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
+ ; GFX9-LABEL: name: sample_cd_cl_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
+ ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX9: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
+ ; GFX9: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_cd_cl_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
+ ; GFX10: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
+ ; GFX9-LABEL: name: sample_c_cd_cl_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
+ ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[CONCAT_VECTORS]](<8 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_cd_cl_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
+ ; GFX9-LABEL: name: sample_c_cd_cl_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)
+ ; GFX9: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX9: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY24]](s32), [[COPY25]](s32)
+ ; GFX9: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<10 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_cd_cl_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)
+ ; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX10: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY24]](s32), [[COPY25]](s32)
+ ; GFX10: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
+ ; GFX9-LABEL: name: sample_l_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_l_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
+ ; GFX9-LABEL: name: sample_l_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_l_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
+ ; GFX9-LABEL: name: sample_c_l_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_l_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
+ ; GFX9-LABEL: name: sample_c_l_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_l_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
+ ; GFX9-LABEL: name: sample_lz_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_lz_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
+ ; GFX9-LABEL: name: sample_lz_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_lz_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
+ ; GFX9-LABEL: name: sample_c_lz_1d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_lz_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
+ ; GFX9-LABEL: name: sample_c_lz_2d
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ; GFX10-LABEL: name: sample_c_lz_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
+ ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
+ ; GFX9-LABEL: name: sample_c_d_o_2darray_V1
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+ ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX9: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
+ ; GFX9: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; GFX9: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[COPY26]](s32)
+ ; GFX9: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ; GFX10-LABEL: name: sample_c_d_o_2darray_V1
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
+ ; GFX10: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; GFX10: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[COPY26]](s32)
+ ; GFX10: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GFX10: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
+main_body:
+ %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret float %v
+}
+
+define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
+ ; GFX9-LABEL: name: sample_c_d_o_2darray_V2
+ ; GFX9: bb.1.main_body:
+ ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+ ; GFX9: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX9: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX9: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX9: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
+ ; GFX9: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; GFX9: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32)
+ ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[COPY26]](s32)
+ ; GFX9: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[DEF]](s32)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
+ ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 8 from custom "TargetCustom8")
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
+ ; GFX9: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
+ ; GFX10-LABEL: name: sample_c_d_o_2darray_V2
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
+ ; GFX10: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32)
+ ; GFX10: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY25]](s32), [[COPY26]](s32)
+ ; GFX10: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY27]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load 8 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
+main_body:
+ %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <2 x float> %v
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s
+
+define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
+ ; GFX10-LABEL: name: sample_d_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+ ; GFX10-LABEL: name: sample_d_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
+ ; GFX10-LABEL: name: sample_d_3d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY24]](s32), [[COPY25]](s32)
+ ; GFX10: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY26]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
+ ; GFX10-LABEL: name: sample_c_d_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+ ; GFX10-LABEL: name: sample_c_d_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
+ ; GFX10-LABEL: name: sample_d_cl_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+ ; GFX10-LABEL: name: sample_d_cl_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
+ ; GFX10-LABEL: name: sample_c_d_cl_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[COPY16]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+ ; GFX10-LABEL: name: sample_c_d_cl_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
+ ; GFX10-LABEL: name: sample_cd_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+ ; GFX10-LABEL: name: sample_cd_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
+ ; GFX10-LABEL: name: sample_c_cd_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+ ; GFX10-LABEL: name: sample_c_cd_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
+ ; GFX10-LABEL: name: sample_cd_cl_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+ ; GFX10-LABEL: name: sample_cd_cl_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32)
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY19]](s32), [[COPY20]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
+ ; GFX10-LABEL: name: sample_c_cd_cl_1d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY17]](s32), [[DEF]](s32)
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY18]](s32), [[DEF]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY15]](s32), [[COPY16]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+ ; GFX10-LABEL: name: sample_c_cd_cl_2d
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY22]](s32), [[COPY23]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
+ ; GFX10-LABEL: name: sample_c_d_o_2darray_V1
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 4 from custom "TargetCustom8")
+ ; GFX10: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
+main_body:
+ %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret float %v
+}
+
+define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
+ ; GFX10-LABEL: name: sample_c_d_o_2darray_V2
+ ; GFX10: bb.1.main_body:
+ ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32)
+ ; GFX10: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32)
+ ; GFX10: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32)
+ ; GFX10: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY21]](s32), [[COPY22]](s32)
+ ; GFX10: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32)
+ ; GFX10: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32)
+ ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY23]](s32), [[COPY24]](s32)
+ ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load 8 from custom "TargetCustom8")
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
+ ; GFX10: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
+main_body:
+ %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <2 x float> %v
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
; UNPACKED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 2 into custom "TargetCustom8")
+ ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "TargetCustom8")
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: image_store_f16
; PACKED: bb.1 (%ir-block.0):
; PACKED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 2 into custom "TargetCustom8")
+ ; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 2 into custom "TargetCustom8")
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret void
; UNPACKED: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY11]](s32), [[COPY12]](s32)
- ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 4 into custom "TargetCustom8")
+ ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 4 into custom "TargetCustom8")
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: image_store_v2f16
; PACKED: bb.1 (%ir-block.0):
; PACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 4 into custom "TargetCustom8")
+ ; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 4 into custom "TargetCustom8")
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret void
; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; UNPACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32)
- ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8)
+ ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: image_store_v3f16
; PACKED: bb.1 (%ir-block.0):
; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>)
; PACKED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8)
+ ; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret void
; UNPACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
; UNPACKED: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)
- ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 8 into custom "TargetCustom8")
+ ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 8 into custom "TargetCustom8")
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: image_store_v4f16
; PACKED: bb.1 (%ir-block.0):
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>)
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
- ; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable store 8 into custom "TargetCustom8")
+ ; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 8 into custom "TargetCustom8")
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret void
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
+
+define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
+; GFX10-LABEL: sample_d_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff
+; GFX10-NEXT: s_lshl_b32 s12, s0, 16
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v0, v0, v3, s12
+; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s12
+; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+; GFX10-LABEL: sample_d_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff
+; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v3, v2, v6, v3
+; GFX10-NEXT: v_and_or_b32 v10, v0, v6, v1
+; GFX10-NEXT: image_sample_d_g16 v[0:3], [v10, v3, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
+; GFX10-LABEL: sample_d_3d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v11, 0xffff
+; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4
+; GFX10-NEXT: s_lshl_b32 s12, s0, 16
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v0, v0, v11, v1
+; GFX10-NEXT: v_and_or_b32 v1, v2, v11, s12
+; GFX10-NEXT: v_and_or_b32 v2, v3, v11, v4
+; GFX10-NEXT: v_and_or_b32 v3, v5, v11, s12
+; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
+; GFX10-LABEL: sample_c_d_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff
+; GFX10-NEXT: s_lshl_b32 s12, s0, 16
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v1, v1, v4, s12
+; GFX10-NEXT: v_and_or_b32 v2, v2, v4, s12
+; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+; GFX10-LABEL: sample_c_d_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v2
+; GFX10-NEXT: v_and_or_b32 v3, v3, v7, v4
+; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
+; GFX10-LABEL: sample_d_cl_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT: s_lshl_b32 s12, s0, 16
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v0, v0, v7, s12
+; GFX10-NEXT: v_and_or_b32 v1, v1, v7, s12
+; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+; GFX10-LABEL: sample_d_cl_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v3
+; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v3, v2, v7, v9
+; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v1
+; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
+; GFX10-LABEL: sample_c_d_cl_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT: s_lshl_b32 s12, s0, 16
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v1, v1, v7, s12
+; GFX10-NEXT: v_and_or_b32 v2, v2, v7, s12
+; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+; GFX10-LABEL: sample_c_d_cl_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff
+; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v4
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v1, v1, v8, v2
+; GFX10-NEXT: v_and_or_b32 v2, v3, v8, v10
+; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
+; GFX10-LABEL: sample_cd_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff
+; GFX10-NEXT: s_lshl_b32 s12, s0, 16
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v0, v0, v3, s12
+; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s12
+; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+; GFX10-LABEL: sample_cd_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff
+; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v3, v2, v6, v3
+; GFX10-NEXT: v_and_or_b32 v10, v0, v6, v1
+; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v10, v3, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
+; GFX10-LABEL: sample_c_cd_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff
+; GFX10-NEXT: s_lshl_b32 s12, s0, 16
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v1, v1, v4, s12
+; GFX10-NEXT: v_and_or_b32 v2, v2, v4, s12
+; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+; GFX10-LABEL: sample_c_cd_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v2
+; GFX10-NEXT: v_and_or_b32 v3, v3, v7, v4
+; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
+; GFX10-LABEL: sample_cd_cl_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT: s_lshl_b32 s12, s0, 16
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v0, v0, v7, s12
+; GFX10-NEXT: v_and_or_b32 v1, v1, v7, s12
+; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+; GFX10-LABEL: sample_cd_cl_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v3
+; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v3, v2, v7, v9
+; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v1
+; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
+; GFX10-LABEL: sample_c_cd_cl_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT: s_lshl_b32 s12, s0, 16
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v1, v1, v7, s12
+; GFX10-NEXT: v_and_or_b32 v2, v2, v7, s12
+; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+; GFX10-LABEL: sample_c_cd_cl_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff
+; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v4
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v1, v1, v8, v2
+; GFX10-NEXT: v_and_or_b32 v2, v3, v8, v10
+; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
+; GFX10-LABEL: sample_c_d_o_2darray_V1:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
+; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v5
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v2, v2, v9, v3
+; GFX10-NEXT: v_and_or_b32 v3, v4, v9, v11
+; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret float %v
+}
+
+define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
+; GFX10-LABEL: sample_c_d_o_2darray_V2:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
+; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v5
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_or_b32 v2, v2, v9, v3
+; GFX10-NEXT: v_and_or_b32 v3, v4, v9, v11
+; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <2 x float> %v
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck -check-prefix=FAST %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck -check-prefix=GREEDY %s
; Natural mapping
define amdgpu_ps void @load_1d_vgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 %s) {
- ; CHECK-LABEL: name: load_1d_vgpr_vaddr__sgpr_srsrc
- ; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
- ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
- ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
- ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
- ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
- ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
- ; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
- ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
- ; CHECK: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
- ; CHECK: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
- ; CHECK: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
- ; CHECK: S_ENDPGM 0
+ ; FAST-LABEL: name: load_1d_vgpr_vaddr__sgpr_srsrc
+ ; FAST: bb.1 (%ir-block.0):
+ ; FAST: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
+ ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; FAST: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; FAST: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
+ ; FAST: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
+ ; FAST: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
+ ; FAST: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
+ ; FAST: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; FAST: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+ ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; FAST: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
+ ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; FAST: S_ENDPGM 0
+ ; GREEDY-LABEL: name: load_1d_vgpr_vaddr__sgpr_srsrc
+ ; GREEDY: bb.1 (%ir-block.0):
+ ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
+ ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
+ ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
+ ; GREEDY: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
+ ; GREEDY: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
+ ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+ ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
+ ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; GREEDY: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
; Copy needed for VGPR argument
define amdgpu_ps void @load_1d_sgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 inreg %s) {
- ; CHECK-LABEL: name: load_1d_sgpr_vaddr__sgpr_srsrc
- ; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10
- ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
- ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
- ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
- ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
- ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
- ; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
- ; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
- ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32)
- ; CHECK: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
- ; CHECK: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
- ; CHECK: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
- ; CHECK: S_ENDPGM 0
+ ; FAST-LABEL: name: load_1d_sgpr_vaddr__sgpr_srsrc
+ ; FAST: bb.1 (%ir-block.0):
+ ; FAST: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10
+ ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; FAST: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; FAST: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
+ ; FAST: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
+ ; FAST: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
+ ; FAST: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
+ ; FAST: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
+ ; FAST: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+ ; FAST: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32)
+ ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; FAST: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
+ ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; FAST: S_ENDPGM 0
+ ; GREEDY-LABEL: name: load_1d_sgpr_vaddr__sgpr_srsrc
+ ; GREEDY: bb.1 (%ir-block.0):
+ ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10
+ ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
+ ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
+ ; GREEDY: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
+ ; GREEDY: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
+ ; GREEDY: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
+ ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+ ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32)
+ ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GREEDY: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
+ ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; GREEDY: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
; Waterfall loop needed for rsrc
define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) {
- ; CHECK-LABEL: name: load_1d_vgpr_vaddr__vgpr_srsrc
- ; CHECK: bb.1 (%ir-block.0):
- ; CHECK: successors: %bb.2(0x80000000)
- ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
- ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
- ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
- ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
- ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
- ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
- ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
- ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
- ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; CHECK: bb.2:
- ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %19, %bb.2
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2
- ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
- ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
- ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
- ; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
- ; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
- ; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
- ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
- ; CHECK: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
- ; CHECK: bb.3:
- ; CHECK: successors: %bb.4(0x80000000)
- ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; CHECK: bb.4:
- ; CHECK: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
- ; CHECK: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
- ; CHECK: S_ENDPGM 0
+ ; FAST-LABEL: name: load_1d_vgpr_vaddr__vgpr_srsrc
+ ; FAST: bb.1 (%ir-block.0):
+ ; FAST: successors: %bb.2(0x80000000)
+ ; FAST: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; FAST: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
+ ; FAST: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; FAST: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; FAST: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
+ ; FAST: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
+ ; FAST: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
+ ; FAST: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+ ; FAST: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
+ ; FAST: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; FAST: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
+ ; FAST: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; FAST: bb.2:
+ ; FAST: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; FAST: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %19, %bb.2
+ ; FAST: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2
+ ; FAST: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
+ ; FAST: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
+ ; FAST: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
+ ; FAST: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; FAST: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
+ ; FAST: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
+ ; FAST: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
+ ; FAST: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
+ ; FAST: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
+ ; FAST: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
+ ; FAST: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
+ ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; FAST: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; FAST: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; FAST: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; FAST: bb.3:
+ ; FAST: successors: %bb.4(0x80000000)
+ ; FAST: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; FAST: bb.4:
+ ; FAST: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
+ ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; FAST: S_ENDPGM 0
+ ; GREEDY-LABEL: name: load_1d_vgpr_vaddr__vgpr_srsrc
+ ; GREEDY: bb.1 (%ir-block.0):
+ ; GREEDY: successors: %bb.2(0x80000000)
+ ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
+ ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
+ ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
+ ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
+ ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+ ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
+ ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
+ ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GREEDY: bb.2:
+ ; GREEDY: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %19, %bb.2
+ ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2
+ ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
+ ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GREEDY: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
+ ; GREEDY: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
+ ; GREEDY: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
+ ; GREEDY: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
+ ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
+ ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: bb.3:
+ ; GREEDY: successors: %bb.4(0x80000000)
+ ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GREEDY: bb.4:
+ ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
+ ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; GREEDY: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
; Waterfall loop needed for rsrc, copy needed for vaddr
define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg %s) {
- ; CHECK-LABEL: name: load_1d_sgpr_vaddr__vgpr_srsrc
- ; CHECK: bb.1 (%ir-block.0):
- ; CHECK: successors: %bb.2(0x80000000)
- ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
- ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
- ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
- ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
- ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
- ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
- ; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
- ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32)
- ; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
- ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
- ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; CHECK: bb.2:
- ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %20, %bb.2
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2
- ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
- ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
- ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
- ; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
- ; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
- ; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
- ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
- ; CHECK: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
- ; CHECK: bb.3:
- ; CHECK: successors: %bb.4(0x80000000)
- ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; CHECK: bb.4:
- ; CHECK: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
- ; CHECK: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
- ; CHECK: S_ENDPGM 0
+ ; FAST-LABEL: name: load_1d_sgpr_vaddr__vgpr_srsrc
+ ; FAST: bb.1 (%ir-block.0):
+ ; FAST: successors: %bb.2(0x80000000)
+ ; FAST: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+ ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; FAST: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
+ ; FAST: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; FAST: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; FAST: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
+ ; FAST: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
+ ; FAST: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; FAST: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+ ; FAST: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32)
+ ; FAST: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
+ ; FAST: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; FAST: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
+ ; FAST: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; FAST: bb.2:
+ ; FAST: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; FAST: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %20, %bb.2
+ ; FAST: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2
+ ; FAST: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
+ ; FAST: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
+ ; FAST: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
+ ; FAST: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; FAST: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
+ ; FAST: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
+ ; FAST: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
+ ; FAST: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
+ ; FAST: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
+ ; FAST: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
+ ; FAST: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
+ ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; FAST: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; FAST: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; FAST: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; FAST: bb.3:
+ ; FAST: successors: %bb.4(0x80000000)
+ ; FAST: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; FAST: bb.4:
+ ; FAST: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
+ ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; FAST: S_ENDPGM 0
+ ; GREEDY-LABEL: name: load_1d_sgpr_vaddr__vgpr_srsrc
+ ; GREEDY: bb.1 (%ir-block.0):
+ ; GREEDY: successors: %bb.2(0x80000000)
+ ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+ ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
+ ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
+ ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
+ ; GREEDY: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+ ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32)
+ ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
+ ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
+ ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GREEDY: bb.2:
+ ; GREEDY: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %20, %bb.2
+ ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2
+ ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
+ ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GREEDY: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
+ ; GREEDY: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
+ ; GREEDY: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
+ ; GREEDY: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
+ ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
+ ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: bb.3:
+ ; GREEDY: successors: %bb.4(0x80000000)
+ ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GREEDY: bb.4:
+ ; GREEDY: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
+ ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; GREEDY: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck -check-prefix=FAST %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck -check-prefix=GREEDY %s
; Natural mapping
define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
- ; CHECK-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp
- ; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0
- ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
- ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
- ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
- ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
- ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
- ; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
- ; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
- ; CHECK: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11
- ; CHECK: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12
- ; CHECK: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13
- ; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
- ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
- ; CHECK: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
- ; CHECK: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
- ; CHECK: S_ENDPGM 0
+ ; FAST-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp
+ ; FAST: bb.1 (%ir-block.0):
+ ; FAST: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0
+ ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; FAST: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; FAST: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
+ ; FAST: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
+ ; FAST: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
+ ; FAST: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
+ ; FAST: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
+ ; FAST: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11
+ ; FAST: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12
+ ; FAST: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13
+ ; FAST: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; FAST: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; FAST: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+ ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; FAST: S_ENDPGM 0
+ ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp
+ ; GREEDY: bb.1 (%ir-block.0):
+ ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0
+ ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
+ ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
+ ; GREEDY: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
+ ; GREEDY: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
+ ; GREEDY: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
+ ; GREEDY: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11
+ ; GREEDY: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12
+ ; GREEDY: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13
+ ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+ ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; GREEDY: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
; Copy required for VGPR input
define amdgpu_ps void @sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float inreg %s) {
- ; CHECK-LABEL: name: sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp
- ; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14
- ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
- ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
- ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
- ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
- ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
- ; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
- ; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
- ; CHECK: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11
- ; CHECK: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12
- ; CHECK: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13
- ; CHECK: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
- ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
- ; CHECK: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32)
- ; CHECK: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
- ; CHECK: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
- ; CHECK: S_ENDPGM 0
+ ; FAST-LABEL: name: sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp
+ ; FAST: bb.1 (%ir-block.0):
+ ; FAST: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14
+ ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; FAST: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; FAST: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
+ ; FAST: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
+ ; FAST: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
+ ; FAST: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
+ ; FAST: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
+ ; FAST: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11
+ ; FAST: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12
+ ; FAST: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13
+ ; FAST: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14
+ ; FAST: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; FAST: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+ ; FAST: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32)
+ ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; FAST: S_ENDPGM 0
+ ; GREEDY-LABEL: name: sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp
+ ; GREEDY: bb.1 (%ir-block.0):
+ ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14
+ ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
+ ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
+ ; GREEDY: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
+ ; GREEDY: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
+ ; GREEDY: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
+ ; GREEDY: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11
+ ; GREEDY: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12
+ ; GREEDY: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13
+ ; GREEDY: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14
+ ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+ ; GREEDY: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32)
+ ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; GREEDY: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
; Waterfall loop for rsrc
define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsrc, <4 x i32> inreg %samp, float %s) {
- ; CHECK-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp
- ; CHECK: bb.1 (%ir-block.0):
- ; CHECK: successors: %bb.2(0x80000000)
- ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
- ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
- ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
- ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
- ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
- ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
- ; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; CHECK: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; CHECK: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
- ; CHECK: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
- ; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
- ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
- ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
- ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; CHECK: bb.2:
- ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
- ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
- ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
- ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
- ; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
- ; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
- ; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
- ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
- ; CHECK: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
- ; CHECK: bb.3:
- ; CHECK: successors: %bb.4(0x80000000)
- ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; CHECK: bb.4:
- ; CHECK: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
- ; CHECK: S_ENDPGM 0
+ ; FAST-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp
+ ; FAST: bb.1 (%ir-block.0):
+ ; FAST: successors: %bb.2(0x80000000)
+ ; FAST: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; FAST: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
+ ; FAST: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; FAST: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; FAST: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
+ ; FAST: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
+ ; FAST: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; FAST: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; FAST: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; FAST: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; FAST: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
+ ; FAST: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; FAST: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+ ; FAST: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
+ ; FAST: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; FAST: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
+ ; FAST: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; FAST: bb.2:
+ ; FAST: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; FAST: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
+ ; FAST: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
+ ; FAST: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
+ ; FAST: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
+ ; FAST: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
+ ; FAST: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; FAST: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
+ ; FAST: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
+ ; FAST: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
+ ; FAST: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
+ ; FAST: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
+ ; FAST: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
+ ; FAST: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
+ ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; FAST: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; FAST: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; FAST: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; FAST: bb.3:
+ ; FAST: successors: %bb.4(0x80000000)
+ ; FAST: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; FAST: bb.4:
+ ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; FAST: S_ENDPGM 0
+ ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp
+ ; GREEDY: bb.1 (%ir-block.0):
+ ; GREEDY: successors: %bb.2(0x80000000)
+ ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
+ ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
+ ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
+ ; GREEDY: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; GREEDY: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; GREEDY: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; GREEDY: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
+ ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+ ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
+ ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
+ ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GREEDY: bb.2:
+ ; GREEDY: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
+ ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
+ ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
+ ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GREEDY: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
+ ; GREEDY: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
+ ; GREEDY: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
+ ; GREEDY: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
+ ; GREEDY: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
+ ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: bb.3:
+ ; GREEDY: successors: %bb.4(0x80000000)
+ ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GREEDY: bb.4:
+ ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; GREEDY: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
; Waterfall loop for sampler
define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> %samp, float %s) {
- ; CHECK-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp
- ; CHECK: bb.1 (%ir-block.0):
- ; CHECK: successors: %bb.2(0x80000000)
- ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
- ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
- ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
- ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
- ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
- ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
- ; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
- ; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
- ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
- ; CHECK: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
- ; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
- ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
- ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
- ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; CHECK: bb.2:
- ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
- ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
- ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
- ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
- ; CHECK: bb.3:
- ; CHECK: successors: %bb.4(0x80000000)
- ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; CHECK: bb.4:
- ; CHECK: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
- ; CHECK: S_ENDPGM 0
+ ; FAST-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp
+ ; FAST: bb.1 (%ir-block.0):
+ ; FAST: successors: %bb.2(0x80000000)
+ ; FAST: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; FAST: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; FAST: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
+ ; FAST: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
+ ; FAST: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
+ ; FAST: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
+ ; FAST: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; FAST: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; FAST: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; FAST: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
+ ; FAST: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; FAST: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; FAST: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+ ; FAST: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
+ ; FAST: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; FAST: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
+ ; FAST: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; FAST: bb.2:
+ ; FAST: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; FAST: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
+ ; FAST: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
+ ; FAST: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
+ ; FAST: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
+ ; FAST: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
+ ; FAST: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; FAST: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; FAST: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; FAST: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; FAST: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; FAST: bb.3:
+ ; FAST: successors: %bb.4(0x80000000)
+ ; FAST: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; FAST: bb.4:
+ ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; FAST: S_ENDPGM 0
+ ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp
+ ; GREEDY: bb.1 (%ir-block.0):
+ ; GREEDY: successors: %bb.2(0x80000000)
+ ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+ ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+ ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+ ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
+ ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
+ ; GREEDY: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
+ ; GREEDY: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
+ ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GREEDY: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; GREEDY: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
+ ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+ ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
+ ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
+ ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GREEDY: bb.2:
+ ; GREEDY: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
+ ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
+ ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
+ ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GREEDY: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: bb.3:
+ ; GREEDY: successors: %bb.4(0x80000000)
+ ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GREEDY: bb.4:
+ ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; GREEDY: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
; Waterfall loop for rsrc and sampler
define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsrc, <4 x i32> %samp, float %s) {
- ; CHECK-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp
- ; CHECK: bb.1 (%ir-block.0):
- ; CHECK: successors: %bb.2(0x80000000)
- ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
- ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
- ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
- ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
- ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
- ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
- ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
- ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr9
- ; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr10
- ; CHECK: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr11
- ; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
- ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
- ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
- ; CHECK: [[UV4:%[0-9]+]]:vreg_64(s64), [[UV5:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
- ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
- ; CHECK: bb.2:
- ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
- ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
- ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
- ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
- ; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
- ; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
- ; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
- ; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
- ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
- ; CHECK: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub1(s64), implicit $exec
- ; CHECK: [[MV4:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV4]](s64), [[UV4]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_3:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_4]], [[S_AND_B64_2]], implicit-def $scc
- ; CHECK: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub0(s64), implicit $exec
- ; CHECK: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub1(s64), implicit $exec
- ; CHECK: [[MV5:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
- ; CHECK: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec
- ; CHECK: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc
- ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
- ; CHECK: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec
- ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
- ; CHECK: bb.3:
- ; CHECK: successors: %bb.4(0x80000000)
- ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
- ; CHECK: bb.4:
- ; CHECK: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
- ; CHECK: S_ENDPGM 0
+ ; FAST-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp
+ ; FAST: bb.1 (%ir-block.0):
+ ; FAST: successors: %bb.2(0x80000000)
+ ; FAST: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12
+ ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; FAST: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
+ ; FAST: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; FAST: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; FAST: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
+ ; FAST: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
+ ; FAST: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
+ ; FAST: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr9
+ ; FAST: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr10
+ ; FAST: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr11
+ ; FAST: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12
+ ; FAST: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; FAST: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+ ; FAST: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
+ ; FAST: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; FAST: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
+ ; FAST: [[UV4:%[0-9]+]]:vreg_64(s64), [[UV5:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
+ ; FAST: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; FAST: bb.2:
+ ; FAST: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; FAST: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
+ ; FAST: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
+ ; FAST: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
+ ; FAST: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
+ ; FAST: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
+ ; FAST: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; FAST: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
+ ; FAST: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
+ ; FAST: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
+ ; FAST: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
+ ; FAST: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
+ ; FAST: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
+ ; FAST: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
+ ; FAST: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub1(s64), implicit $exec
+ ; FAST: [[MV4:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV4]](s64), [[UV4]](s64), implicit $exec
+ ; FAST: [[S_AND_B64_3:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_4]], [[S_AND_B64_2]], implicit-def $scc
+ ; FAST: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub0(s64), implicit $exec
+ ; FAST: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub1(s64), implicit $exec
+ ; FAST: [[MV5:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
+ ; FAST: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec
+ ; FAST: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc
+ ; FAST: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
+ ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; FAST: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; FAST: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; FAST: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; FAST: bb.3:
+ ; FAST: successors: %bb.4(0x80000000)
+ ; FAST: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; FAST: bb.4:
+ ; FAST: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; FAST: S_ENDPGM 0
+ ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp
+ ; GREEDY: bb.1 (%ir-block.0):
+ ; GREEDY: successors: %bb.2(0x80000000)
+ ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12
+ ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
+ ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+ ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
+ ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
+ ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
+ ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
+ ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr9
+ ; GREEDY: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr10
+ ; GREEDY: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr11
+ ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12
+ ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+ ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
+ ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
+ ; GREEDY: [[UV4:%[0-9]+]]:vreg_64(s64), [[UV5:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
+ ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
+ ; GREEDY: bb.2:
+ ; GREEDY: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
+ ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
+ ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
+ ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
+ ; GREEDY: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
+ ; GREEDY: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
+ ; GREEDY: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
+ ; GREEDY: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
+ ; GREEDY: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
+ ; GREEDY: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV4:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV4]](s64), [[UV4]](s64), implicit $exec
+ ; GREEDY: [[S_AND_B64_3:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_4]], [[S_AND_B64_2]], implicit-def $scc
+ ; GREEDY: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub0(s64), implicit $exec
+ ; GREEDY: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub1(s64), implicit $exec
+ ; GREEDY: [[MV5:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
+ ; GREEDY: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec
+ ; GREEDY: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc
+ ; GREEDY: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
+ ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
+ ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: bb.3:
+ ; GREEDY: successors: %bb.4(0x80000000)
+ ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
+ ; GREEDY: bb.4:
+ ; GREEDY: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; GREEDY: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10 %s
+
+define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
+; GFX10-LABEL: sample_d_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x0f,0x88,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+; GFX10-LABEL: sample_d_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff ; encoding: [0xff,0x02,0x0e,0x7e,0xff,0xff,0x00,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 ; encoding: [0x07,0x05,0x04,0x36]
+; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 ; encoding: [0x07,0x01,0x00,0x36]
+; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 ; encoding: [0x02,0x00,0x6f,0xd7,0x03,0x21,0x09,0x04]
+; GFX10-NEXT: v_lshl_or_b32 v3, v1, 16, v0 ; encoding: [0x03,0x00,0x6f,0xd7,0x01,0x21,0x01,0x04]
+; GFX10-NEXT: image_sample_d_g16 v[0:3], [v3, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0b,0x0f,0x88,0xf0,0x03,0x00,0x40,0x00,0x02,0x04,0x05,0x00]
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
+; GFX10-LABEL: sample_d_3d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff ; encoding: [0xff,0x02,0x12,0x7e,0xff,0xff,0x00,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v3, v9, v3 ; encoding: [0x09,0x07,0x06,0x36]
+; GFX10-NEXT: v_and_b32_e32 v0, v9, v0 ; encoding: [0x09,0x01,0x00,0x36]
+; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 ; encoding: [0x03,0x00,0x6f,0xd7,0x04,0x21,0x0d,0x04]
+; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; encoding: [0x00,0x00,0x6f,0xd7,0x01,0x21,0x01,0x04]
+; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x15,0x0f,0x88,0xf0,0x00,0x00,0x40,0x00,0x02,0x03,0x05,0x06,0x07,0x08,0x00,0x00]
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
+; GFX10-LABEL: sample_c_d_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+; GFX10-LABEL: sample_c_d_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v10, 0xffff ; encoding: [0xff,0x02,0x14,0x7e,0xff,0xff,0x00,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v3, v10, v3 ; encoding: [0x0a,0x07,0x06,0x36]
+; GFX10-NEXT: v_and_b32_e32 v1, v10, v1 ; encoding: [0x0a,0x03,0x02,0x36]
+; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 ; encoding: [0x03,0x00,0x6f,0xd7,0x04,0x21,0x0d,0x04]
+; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 ; encoding: [0x01,0x00,0x6f,0xd7,0x02,0x21,0x05,0x04]
+; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0b,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x00,0x01,0x03,0x05,0x06]
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
+; GFX10-LABEL: sample_d_cl_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x0f,0x8c,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+; GFX10-LABEL: sample_d_cl_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff ; encoding: [0xff,0x02,0x0e,0x7e,0xff,0xff,0x00,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 ; encoding: [0x07,0x01,0x00,0x36]
+; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 ; encoding: [0x07,0x05,0x04,0x36]
+; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; encoding: [0x00,0x00,0x6f,0xd7,0x01,0x21,0x01,0x04]
+; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2 ; encoding: [0x03,0x00,0x6f,0xd7,0x03,0x21,0x09,0x04]
+; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0b,0x0f,0x8c,0xf0,0x00,0x00,0x40,0x00,0x03,0x04,0x05,0x06]
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
+; GFX10-LABEL: sample_c_d_cl_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x0f,0xac,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+; GFX10-LABEL: sample_c_d_cl_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff ; encoding: [0xff,0x02,0x10,0x7e,0xff,0xff,0x00,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 ; encoding: [0x08,0x07,0x06,0x36]
+; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 ; encoding: [0x08,0x03,0x02,0x36]
+; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 ; encoding: [0x03,0x00,0x6f,0xd7,0x04,0x21,0x0d,0x04]
+; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 ; encoding: [0x01,0x00,0x6f,0xd7,0x02,0x21,0x05,0x04]
+; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0d,0x0f,0xac,0xf0,0x00,0x00,0x40,0x00,0x01,0x03,0x05,0x06,0x07,0x00,0x00,0x00]
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
+; GFX10-LABEL: sample_cd_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x0f,0xa0,0xf1,0x00,0x00,0x40,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+; GFX10-LABEL: sample_cd_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff ; encoding: [0xff,0x02,0x0e,0x7e,0xff,0xff,0x00,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 ; encoding: [0x07,0x05,0x04,0x36]
+; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 ; encoding: [0x07,0x01,0x00,0x36]
+; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 ; encoding: [0x02,0x00,0x6f,0xd7,0x03,0x21,0x09,0x04]
+; GFX10-NEXT: v_lshl_or_b32 v3, v1, 16, v0 ; encoding: [0x03,0x00,0x6f,0xd7,0x01,0x21,0x01,0x04]
+; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v3, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0b,0x0f,0xa0,0xf1,0x03,0x00,0x40,0x00,0x02,0x04,0x05,0x00]
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
+; GFX10-LABEL: sample_c_cd_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+; GFX10-LABEL: sample_c_cd_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v10, 0xffff ; encoding: [0xff,0x02,0x14,0x7e,0xff,0xff,0x00,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v3, v10, v3 ; encoding: [0x0a,0x07,0x06,0x36]
+; GFX10-NEXT: v_and_b32_e32 v1, v10, v1 ; encoding: [0x0a,0x03,0x02,0x36]
+; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 ; encoding: [0x03,0x00,0x6f,0xd7,0x04,0x21,0x0d,0x04]
+; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 ; encoding: [0x01,0x00,0x6f,0xd7,0x02,0x21,0x05,0x04]
+; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0b,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x00,0x01,0x03,0x05,0x06]
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
+; GFX10-LABEL: sample_cd_cl_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x0f,0xa4,0xf1,0x00,0x00,0x40,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+; GFX10-LABEL: sample_cd_cl_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff ; encoding: [0xff,0x02,0x0e,0x7e,0xff,0xff,0x00,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 ; encoding: [0x07,0x01,0x00,0x36]
+; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 ; encoding: [0x07,0x05,0x04,0x36]
+; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; encoding: [0x00,0x00,0x6f,0xd7,0x01,0x21,0x01,0x04]
+; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2 ; encoding: [0x03,0x00,0x6f,0xd7,0x03,0x21,0x09,0x04]
+; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0b,0x0f,0xa4,0xf1,0x00,0x00,0x40,0x00,0x03,0x04,0x05,0x06]
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
+; GFX10-LABEL: sample_c_cd_cl_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x0f,0xac,0xf1,0x00,0x00,0x40,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+; GFX10-LABEL: sample_c_cd_cl_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff ; encoding: [0xff,0x02,0x10,0x7e,0xff,0xff,0x00,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 ; encoding: [0x08,0x07,0x06,0x36]
+; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 ; encoding: [0x08,0x03,0x02,0x36]
+; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 ; encoding: [0x03,0x00,0x6f,0xd7,0x04,0x21,0x0d,0x04]
+; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 ; encoding: [0x01,0x00,0x6f,0xd7,0x02,0x21,0x05,0x04]
+; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0d,0x0f,0xac,0xf1,0x00,0x00,0x40,0x00,0x01,0x03,0x05,0x06,0x07,0x00,0x00,0x00]
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
+; GFX10-LABEL: sample_c_d_o_2darray_V1:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff ; encoding: [0xff,0x02,0x12,0x7e,0xff,0xff,0x00,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 ; encoding: [0x09,0x09,0x08,0x36]
+; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 ; encoding: [0x09,0x05,0x04,0x36]
+; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 ; encoding: [0x04,0x00,0x6f,0xd7,0x05,0x21,0x11,0x04]
+; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 ; encoding: [0x02,0x00,0x6f,0xd7,0x03,0x21,0x09,0x04]
+; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2d,0x04,0xe8,0xf0,0x00,0x00,0x40,0x00,0x01,0x02,0x04,0x06,0x07,0x08,0x00,0x00]
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret float %v
+}
+
+define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
+; GFX10-LABEL: sample_c_d_o_2darray_V2:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff ; encoding: [0xff,0x02,0x12,0x7e,0xff,0xff,0x00,0x00]
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 ; encoding: [0x09,0x09,0x08,0x36]
+; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 ; encoding: [0x09,0x05,0x04,0x36]
+; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 ; encoding: [0x04,0x00,0x6f,0xd7,0x05,0x21,0x11,0x04]
+; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 ; encoding: [0x02,0x00,0x6f,0xd7,0x03,0x21,0x09,0x04]
+; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2d,0x06,0xe8,0xf0,0x00,0x00,0x40,0x00,0x01,0x02,0x04,0x06,0x07,0x08,0x00,0x00]
+; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <2 x float> %v
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
+
+define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
+; GFX10-LABEL: sample_d_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+; GFX10-LABEL: sample_d_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v2, v7, v2
+; GFX10-NEXT: v_and_b32_e32 v0, v7, v0
+; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT: v_lshl_or_b32 v3, v1, 16, v0
+; GFX10-NEXT: image_sample_d_g16 v[0:3], [v3, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
+; GFX10-LABEL: sample_d_3d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v3, v9, v3
+; GFX10-NEXT: v_and_b32_e32 v0, v9, v0
+; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
+; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
+; GFX10-LABEL: sample_c_d_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+; GFX10-LABEL: sample_c_d_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v10, 0xffff
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v3, v10, v3
+; GFX10-NEXT: v_and_b32_e32 v1, v10, v1
+; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
+; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
+; GFX10-LABEL: sample_d_cl_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+; GFX10-LABEL: sample_d_cl_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v0, v7, v0
+; GFX10-NEXT: v_and_b32_e32 v2, v7, v2
+; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2
+; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
+; GFX10-LABEL: sample_c_d_cl_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+; GFX10-LABEL: sample_c_d_cl_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v3, v8, v3
+; GFX10-NEXT: v_and_b32_e32 v1, v8, v1
+; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
+; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
+; GFX10-LABEL: sample_cd_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+; GFX10-LABEL: sample_cd_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v2, v7, v2
+; GFX10-NEXT: v_and_b32_e32 v0, v7, v0
+; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT: v_lshl_or_b32 v3, v1, 16, v0
+; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v3, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
+; GFX10-LABEL: sample_c_cd_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+; GFX10-LABEL: sample_c_cd_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v10, 0xffff
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v3, v10, v3
+; GFX10-NEXT: v_and_b32_e32 v1, v10, v1
+; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
+; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
+; GFX10-LABEL: sample_cd_cl_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+; GFX10-LABEL: sample_cd_cl_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v0, v7, v0
+; GFX10-NEXT: v_and_b32_e32 v2, v7, v2
+; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2
+; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
+; GFX10-LABEL: sample_c_cd_cl_1d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+; GFX10-LABEL: sample_c_cd_cl_2d:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v3, v8, v3
+; GFX10-NEXT: v_and_b32_e32 v1, v8, v1
+; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
+; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
+; GFX10-LABEL: sample_c_d_o_2darray_V1:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v4, v9, v4
+; GFX10-NEXT: v_and_b32_e32 v2, v9, v2
+; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4
+; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret float %v
+}
+
+define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
+; GFX10-LABEL: sample_c_d_o_2darray_V2:
+; GFX10: ; %bb.0: ; %main_body
+; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_and_b32_e32 v4, v9, v4
+; GFX10-NEXT: v_and_b32_e32 v2, v9, v2
+; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4
+; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
+main_body:
+ %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <2 x float> %v
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
--- /dev/null
+; RUN: not --crash llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s
+; RUN: not --crash llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s
+
+; Make sure this doesn't assert on targets without the g16 feature, and instead
+; generates a selection error.
+
+; ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.image.sample.d.1d
+
+define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+
+attributes #0 = { nounwind readonly }