InstCombine/AMDGPU: Add dimension-aware image intrinsics to SimplifyDemanded

author Nicolai Haehnle <nhaehnle@gmail.com>

Thu, 21 Jun 2018 13:37:31 +0000 (13:37 +0000)

committer Nicolai Haehnle <nhaehnle@gmail.com>

Thu, 21 Jun 2018 13:37:31 +0000 (13:37 +0000)
author Nicolai Haehnle <nhaehnle@gmail.com>
Thu, 21 Jun 2018 13:37:31 +0000 (13:37 +0000)
committer Nicolai Haehnle <nhaehnle@gmail.com>
Thu, 21 Jun 2018 13:37:31 +0000 (13:37 +0000)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td

index 44a9fe8..aef7c68 100644 (file)
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -803,9 +803,15 @@ class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
                             !if(P_.IsAtomic, 0, 1)), 1> {
    AMDGPUDimProfile P = P_;
  
+  AMDGPUImageDimIntrinsic Intr = !cast<AMDGPUImageDimIntrinsic>(NAME);
+
    let TargetPrefix = "amdgcn";
  }
  
+// Marker class for intrinsics with a DMask that determines the returned
+// channels.
+class AMDGPUImageDMaskIntrinsic;
+
  defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
  
    //////////////////////////////////////////////////////////////////////////
@@ -839,10 +845,14 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
      }
    }
  
-  defm int_amdgcn_image_load : AMDGPUImageDimIntrinsicsAll<
-              "LOAD", [llvm_anyfloat_ty], [], [IntrReadMem], [SDNPMemOperand]>;
-  defm int_amdgcn_image_load_mip : AMDGPUImageDimIntrinsicsNoMsaa<
-              "LOAD_MIP", [llvm_anyfloat_ty], [], [IntrReadMem], [SDNPMemOperand], 1>;
+  defm int_amdgcn_image_load
+    : AMDGPUImageDimIntrinsicsAll<"LOAD", [llvm_anyfloat_ty], [], [IntrReadMem],
+                                  [SDNPMemOperand]>,
+      AMDGPUImageDMaskIntrinsic;
+  defm int_amdgcn_image_load_mip
+    : AMDGPUImageDimIntrinsicsNoMsaa<"LOAD_MIP", [llvm_anyfloat_ty], [],
+                                     [IntrReadMem], [SDNPMemOperand], 1>,
+      AMDGPUImageDMaskIntrinsic;
  
    defm int_amdgcn_image_store : AMDGPUImageDimIntrinsicsAll<
                "STORE", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
@@ -866,18 +876,22 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
    }
  
    foreach sample = AMDGPUSampleVariants in {
-    defm int_amdgcn_image_sample # sample.LowerCaseMod :
-        AMDGPUImageDimSampleDims<"SAMPLE" # sample.UpperCaseMod, sample>;
+    defm int_amdgcn_image_sample # sample.LowerCaseMod
+      : AMDGPUImageDimSampleDims<"SAMPLE" # sample.UpperCaseMod, sample>,
+        AMDGPUImageDMaskIntrinsic;
    }
  
-  defm int_amdgcn_image_getlod : AMDGPUImageDimSampleDims<"GET_LOD", AMDGPUSample, 1>;
+  defm int_amdgcn_image_getlod
+    : AMDGPUImageDimSampleDims<"GET_LOD", AMDGPUSample, 1>,
+      AMDGPUImageDMaskIntrinsic;
  
    //////////////////////////////////////////////////////////////////////////
    // getresinfo intrinsics
    //////////////////////////////////////////////////////////////////////////
    foreach dim = AMDGPUDims.All in {
      def !strconcat("int_amdgcn_image_getresinfo_", dim.Name)
-      : AMDGPUImageDimIntrinsic<AMDGPUDimGetResInfoProfile<dim>, [IntrNoMem], []>;
+      : AMDGPUImageDimIntrinsic<AMDGPUDimGetResInfoProfile<dim>, [IntrNoMem], []>,
+        AMDGPUImageDMaskIntrinsic;
    }
  
    //////////////////////////////////////////////////////////////////////////
diff --git a/llvm/lib/Transforms/InstCombine/CMakeLists.txt b/llvm/lib/Transforms/InstCombine/CMakeLists.txt

index 5cbe804..8a3a58e 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/CMakeLists.txt
+++ b/llvm/lib/Transforms/InstCombine/CMakeLists.txt
@@ -1,3 +1,7 @@
+set(LLVM_TARGET_DEFINITIONS InstCombineTables.td)
+tablegen(LLVM InstCombineTables.inc -gen-searchable-tables)
+add_public_tablegen_target(InstCombineTableGen)
+
  add_llvm_library(LLVMInstCombine
    InstructionCombining.cpp
    InstCombineAddSub.cpp
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h

index ba99035..2388852 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -706,6 +706,10 @@ private:
    /// demanded bits.
    bool SimplifyDemandedInstructionBits(Instruction &Inst);
  
+  Value *simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
+                                               APInt DemandedElts,
+                                               int DmaskIdx = -1);
+
    Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
                                      APInt &UndefElts, unsigned Depth = 0);
  
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

index 8190c34..133561b 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -23,6 +23,17 @@ using namespace llvm::PatternMatch;
  
  #define DEBUG_TYPE "instcombine"
  
+namespace {
+
+struct AMDGPUImageDMaskIntrinsic {
+  unsigned Intr;
+};
+
+#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
+#include "InstCombineTables.inc"
+
+} // end anonymous namespace
+
  /// Check to see if the specified operand of the specified instruction is a
  /// constant integer. If so, check to see if there are any bits set in the
  /// constant that are not demanded. If so, shrink the constant and return true.
@@ -909,6 +920,110 @@ InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
    return nullptr;
  }
  
+/// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
+Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
+                                                           APInt DemandedElts,
+                                                           int DMaskIdx) {
+  unsigned VWidth = II->getType()->getVectorNumElements();
+  if (VWidth == 1)
+    return nullptr;
+
+  ConstantInt *NewDMask = nullptr;
+
+  if (DMaskIdx < 0) {
+    // Pretend that a prefix of elements is demanded to simplify the code
+    // below.
+    DemandedElts = (1 << DemandedElts.getActiveBits()) - 1;
+  } else {
+    ConstantInt *DMask = dyn_cast<ConstantInt>(II->getArgOperand(DMaskIdx));
+    if (!DMask)
+      return nullptr; // non-constant dmask is not supported by codegen
+
+    unsigned DMaskVal = DMask->getZExtValue() & 0xf;
+
+    // Mask off values that are undefined because the dmask doesn't cover them
+    DemandedElts &= (1 << countPopulation(DMaskVal)) - 1;
+
+    unsigned NewDMaskVal = 0;
+    unsigned OrigLoadIdx = 0;
+    for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
+      const unsigned Bit = 1 << SrcIdx;
+      if (!!(DMaskVal & Bit)) {
+        if (!!(DemandedElts & (1 << OrigLoadIdx)))
+          NewDMaskVal |= Bit;
+        OrigLoadIdx++;
+      }
+    }
+
+    if (DMaskVal != NewDMaskVal)
+      NewDMask = ConstantInt::get(DMask->getType(), NewDMaskVal);
+  }
+
+  // TODO: Handle 3 vectors when supported in code gen.
+  unsigned NewNumElts = PowerOf2Ceil(DemandedElts.countPopulation());
+  if (!NewNumElts)
+    return UndefValue::get(II->getType());
+
+  if (NewNumElts >= VWidth && DemandedElts.isMask()) {
+    if (NewDMask)
+      II->setArgOperand(DMaskIdx, NewDMask);
+    return nullptr;
+  }
+
+  // Determine the overload types of the original intrinsic.
+  auto IID = II->getIntrinsicID();
+  SmallVector<Intrinsic::IITDescriptor, 16> Table;
+  getIntrinsicInfoTableEntries(IID, Table);
+  ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
+
+  FunctionType *FTy = II->getCalledFunction()->getFunctionType();
+  SmallVector<Type *, 6> OverloadTys;
+  Intrinsic::matchIntrinsicType(FTy->getReturnType(), TableRef, OverloadTys);
+  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+    Intrinsic::matchIntrinsicType(FTy->getParamType(i), TableRef, OverloadTys);
+
+  // Get the new return type overload of the intrinsic.
+  Module *M = II->getParent()->getParent()->getParent();
+  Type *EltTy = II->getType()->getVectorElementType();
+  Type *NewTy = (NewNumElts == 1) ? EltTy : VectorType::get(EltTy, NewNumElts);
+
+  OverloadTys[0] = NewTy;
+  Function *NewIntrin = Intrinsic::getDeclaration(M, IID, OverloadTys);
+
+  SmallVector<Value *, 16> Args;
+  for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I)
+    Args.push_back(II->getArgOperand(I));
+
+  if (NewDMask)
+    Args[DMaskIdx] = NewDMask;
+
+  IRBuilderBase::InsertPointGuard Guard(Builder);
+  Builder.SetInsertPoint(II);
+
+  CallInst *NewCall = Builder.CreateCall(NewIntrin, Args);
+  NewCall->takeName(II);
+  NewCall->copyMetadata(*II);
+
+  if (NewNumElts == 1) {
+    return Builder.CreateInsertElement(UndefValue::get(II->getType()), NewCall,
+                                       DemandedElts.countTrailingZeros());
+  }
+
+  SmallVector<uint32_t, 8> EltMask;
+  unsigned NewLoadIdx = 0;
+  for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
+    if (!!(DemandedElts & (1 << OrigLoadIdx)))
+      EltMask.push_back(NewLoadIdx++);
+    else
+      EltMask.push_back(NewNumElts);
+  }
+
+  Value *Shuffle =
+      Builder.CreateShuffleVector(NewCall, UndefValue::get(NewTy), EltMask);
+
+  return Shuffle;
+}
+
  /// The specified value produces a vector with any number of elements.
  /// DemandedElts contains the set of elements that are actually used by the
  /// caller. This method analyzes which elements of the operand are undef and
@@ -1267,8 +1382,6 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
      IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
      if (!II) break;
      switch (II->getIntrinsicID()) {
-    default: break;
-
      case Intrinsic::x86_xop_vfrcz_ss:
      case Intrinsic::x86_xop_vfrcz_sd:
        // The instructions for these intrinsics are speced to zero upper bits not
@@ -1582,79 +1695,17 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
      case Intrinsic::amdgcn_image_sample_c_cd_cl_o:
  
      case Intrinsic::amdgcn_image_getlod: {
-      if (VWidth == 1 || !DemandedElts.isMask())
-        return nullptr;
-
-      // TODO: Handle 3 vectors when supported in code gen.
-      unsigned NewNumElts = PowerOf2Ceil(DemandedElts.countTrailingOnes());
-      if (NewNumElts == VWidth)
-        return nullptr;
-
-      Module *M = II->getParent()->getParent()->getParent();
-      Type *EltTy = V->getType()->getVectorElementType();
-
-      Type *NewTy = (NewNumElts == 1) ? EltTy :
-        VectorType::get(EltTy, NewNumElts);
-
        auto IID = II->getIntrinsicID();
-
        bool IsBuffer = IID == Intrinsic::amdgcn_buffer_load ||
                        IID == Intrinsic::amdgcn_buffer_load_format;
+      return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts,
+                                                   IsBuffer ? -1 : 3);
+    }
+    default: {
+      if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID()))
+        return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, 0);
  
-      Function *NewIntrin = IsBuffer ?
-        Intrinsic::getDeclaration(M, IID, NewTy) :
-        // Samplers have 3 mangled types.
-        Intrinsic::getDeclaration(M, IID,
-                                  { NewTy, II->getArgOperand(0)->getType(),
-                                      II->getArgOperand(1)->getType()});
-
-      SmallVector<Value *, 5> Args;
-      for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I)
-        Args.push_back(II->getArgOperand(I));
-
-      IRBuilderBase::InsertPointGuard Guard(Builder);
-      Builder.SetInsertPoint(II);
-
-      CallInst *NewCall = Builder.CreateCall(NewIntrin, Args);
-      NewCall->takeName(II);
-      NewCall->copyMetadata(*II);
-
-      if (!IsBuffer) {
-        ConstantInt *DMask = dyn_cast<ConstantInt>(NewCall->getArgOperand(3));
-        if (DMask) {
-          unsigned DMaskVal = DMask->getZExtValue() & 0xf;
-
-          unsigned PopCnt = 0;
-          unsigned NewDMask = 0;
-          for (unsigned I = 0; I < 4; ++I) {
-            const unsigned Bit = 1 << I;
-            if (!!(DMaskVal & Bit)) {
-              if (++PopCnt > NewNumElts)
-                break;
-
-              NewDMask |= Bit;
-            }
-          }
-
-          NewCall->setArgOperand(3, ConstantInt::get(DMask->getType(), NewDMask));
-        }
-      }
-
-
-      if (NewNumElts == 1) {
-        return Builder.CreateInsertElement(UndefValue::get(V->getType()),
-                                           NewCall, static_cast<uint64_t>(0));
-      }
-
-      SmallVector<uint32_t, 8> EltMask;
-      for (unsigned I = 0; I < VWidth; ++I)
-        EltMask.push_back(I);
-
-      Value *Shuffle = Builder.CreateShuffleVector(
-        NewCall, UndefValue::get(NewTy), EltMask);
-
-      MadeChange = true;
-      return Shuffle;
+      break;
      }
      }
      break;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineTables.td b/llvm/lib/Transforms/InstCombine/InstCombineTables.td

new file mode 100644 (file)

index 0000000..98b2adc
--- /dev/null
+++ b/llvm/lib/Transforms/InstCombine/InstCombineTables.td
@@ -0,0 +1,11 @@
+include "llvm/TableGen/SearchableTable.td"
+include "llvm/IR/Intrinsics.td"
+
+def AMDGPUImageDMaskIntrinsicTable : GenericTable {
+  let FilterClass = "AMDGPUImageDMaskIntrinsic";
+  let Fields = ["Intr"];
+
+  let PrimaryKey = ["Intr"];
+  let PrimaryKeyName = "getAMDGPUImageDMaskIntrinsic";
+  let PrimaryKeyEarlyOut = 1;
+}
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll

index 0c4842c..2ca3e32 100644 (file)
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll
@@ -65,8 +65,8 @@ define amdgpu_ps float @extract_elt0_buffer_load_v4f32(<4 x i32> inreg %rsrc, i3
  }
  
  ; CHECK-LABEL: @extract_elt1_buffer_load_v4f32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
-; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 1
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
  ; CHECK-NEXT: ret float %elt1
  define amdgpu_ps float @extract_elt1_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
    %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
@@ -180,8 +180,8 @@ define amdgpu_ps float @extract_elt0_buffer_load_v3f32(<4 x i32> inreg %rsrc, i3
  }
  
  ; CHECK-LABEL: @extract_elt1_buffer_load_v3f32(
-; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
-; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 1
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
+; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
  ; CHECK-NEXT: ret float %elt1
  define amdgpu_ps float @extract_elt1_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 {
    %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false)
@@ -320,842 +320,786 @@ declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i
  ; llvm.amdgcn.image.sample
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-; CHECK-LABEL: @extract_elt0_image_sample_v4f32_v4f32_v4i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_2d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-; CHECK-LABEL: @extract_elt0_image_sample_v4f32_v2f32_v4i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
-; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
-}
-
-; CHECK-LABEL: @extract_elt0_invalid_dmask_image_sample_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 %dmask, i1 false, i1 false, i1 false, i1 false, i1 false)
-; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_invalid_dmask_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc, i32 %dmask) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 %dmask, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_invalid_dmask_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: %elt0 = extractelement <4 x float> %data, i32 0
+; CHECK-NEXT: ret float %elt0
+define amdgpu_ps float @extract_elt0_invalid_dmask_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc, i32 %dmask) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-; FIXME: Should really fold to undef
-; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false)
-; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 0, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(
+; CHECK-NEXT: ret float undef
+define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(float %s, float %t, float %r, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 0, float %s, float %t, float %r, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-; CHECK-LABEL: @extract_elt0_dmask_0001_image_sample_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1darray.f32.f32(i32 1, float %s, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_dmask_0001_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32(float %s, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 1, float %s, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-; FIXME: Should really fold to undef
-; CHECK-LABEL: @extract_elt0_dmask_0010_image_sample_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 2, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 2, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_dmask_0010_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 2, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 2, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-; FIXME: Should really fold to undef
-; CHECK-LABEL: @extract_elt0_dmask_0100_image_sample_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 4, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 4, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_dmask_0100_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 4, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 4, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-; FIXME: Should really fold to undef
-; CHECK-LABEL: @extract_elt0_dmask_1000_image_sample_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 8, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 8, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_dmask_1000_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 8, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 8, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-; CHECK-LABEL: @extract_elt0_dmask_1001_image_sample_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_dmask_1001_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 9, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 9, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-; CHECK-LABEL: @extract_elt0_dmask_0011_image_sample_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_dmask_0011_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-; CHECK-LABEL: @extract_elt0_dmask_0111_image_sample_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 7, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-; CHECK-LABEL: @extract_elt0_elt1_dmask_0001_image_sample_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
-; CHECK-NEXT: ret <2 x float> %data
-define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: %1 = insertelement <2 x float> undef, float %data, i32 0
+; CHECK-NEXT: ret <2 x float> %1
+define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
    ret <2 x float> %shuf
  }
  
-; CHECK-LABEL: @extract_elt0_elt1_dmask_0011_image_sample_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret <2 x float> %data
-define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
    ret <2 x float> %shuf
  }
  
-; CHECK-LABEL: @extract_elt0_elt1_dmask_0111_image_sample_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret <2 x float> %data
-define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 7, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
    ret <2 x float> %shuf
  }
  
-; CHECK-LABEL: @extract_elt0_elt1_dmask_0101_image_sample_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret <2 x float> %data
-define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 0, i32 1>
    ret <2 x float> %shuf
  }
  
-; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0001_image_sample_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: %1 = insertelement <3 x float> undef, float %data, i32 0
+; CHECK-NEXT: ret <3 x float> %1
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x float> %shuf
+}
+
+; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <2 x float> %data, <2 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
  ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    ret <3 x float> %shuf
  }
  
-; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
-; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: %shuf = shufflevector <2 x float> %data, <2 x float> undef, <3 x i32> <i32 0, i32 1, i32 undef>
  ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    ret <3 x float> %shuf
  }
  
-; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
  ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    ret <3 x float> %shuf
  }
  
-; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0111_image_sample_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 7, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
  ; CHECK-NEXT: ret <3 x float> %shuf
-define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 7, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    ret <3 x float> %shuf
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.cl
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_cl_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt1_image_sample_cl_2darray_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cl.2darray.f32.f32(i32 2, float %s, float %t, float %slice, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
+define amdgpu_ps float @extract_elt1_image_sample_cl_2darray_v4f32_f32(float %s, float %t, float %slice, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 1
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.d
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_d_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt2_image_sample_d_cube_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cube.f32.f32.f32(i32 4, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_d_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
+define amdgpu_ps float @extract_elt2_image_sample_d_cube_v4f32_f32_f32(float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 2
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.d.cl
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_d_cl_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cl.1d.f32.f32.f32(i32 8, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_d_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
+define amdgpu_ps float @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 3
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.l
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_l_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.l.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.l.1d.f32.f32(i32 4, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_l_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
+define amdgpu_ps float @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32(float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32(i32 6, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <2 x float> %data, i32 1
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.b
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_b_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.1d.f32.f32.f32(i32 8, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_b_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
+define amdgpu_ps float @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32(float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 9, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 1
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.b.cl
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_b_cl_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
-; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_b_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
+; CHECK-LABEL: @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.b.cl.1d.v2f32.f32.f32(i32 12, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 13, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  ret <2 x float> %shuf
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.lz
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_lz_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.lz.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
-; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_lz_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.lz.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
+; CHECK-LABEL: @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(
+; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.image.sample.lz.1d.v2f32.f32(i32 10, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret <2 x float> %data
+define amdgpu_ps <2 x float> @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> <i32 1, i32 3>
+  ret <2 x float> %shuf
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.cd
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_cd_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
-; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_cd_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.cd.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
+; CHECK-LABEL: @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 14, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+
+; TODO: This should be combined with the following shufflevector
+; CHECK-NEXT: %1 = shufflevector <4 x float> %data, <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
+
+; CHECK-NEXT: %shuf = shufflevector <4 x float> %1, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <3 x float> %shuf
+define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  ret <3 x float> %shuf
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.cd.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.cd.cl
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
-; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_cd_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
+; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32(
+; CHECK-NEXT: %data = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 1, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret half %data
+define amdgpu_ps half @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x half> %data, i32 0
+  ret half %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.1d.f32.f32(i32 1, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_v4f32_v4f32_v4i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
-; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
-}
-
-; CHECK-LABEL: @extract_elt0_image_sample_c_v4f32_v2f32_v4i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
-; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
-}
-
-declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.cl
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_cl_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_cl_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cl.1d.f32.f32(i32 1, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_cl_1d_v4f32_f32(float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.d
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_d_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_d_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.d.cl
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.cl.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.l
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_l_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.l.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_l_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.l.1d.f32.f32(i32 1, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_l_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_l_1d_v4f32_f32(float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.b
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_b_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.1d.f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_b_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.b.cl
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.cl.1d.f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.lz
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_lz_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.lz.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_lz_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.lz.1d.f32.f32(i32 1, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_lz_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_lz_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.cd
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_cd_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_cd_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.cd.cl
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.cl.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.cl.1d.f32.f32.f32(i32 1, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
-; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
-}
-
-; CHECK-LABEL: @extract_elt0_image_sample_o_v4f32_v4f32_v4i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.o.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_o_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.o.1d.f32.f32(i32 1, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_o_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-; CHECK-LABEL: @extract_elt0_image_sample_o_v4f32_v2f32_v4i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.o.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
-; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_o_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
-}
-
-declare <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.sample.o.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.cl.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_cl_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_cl_o_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cl.o.1d.f32.f32(i32 1, i32 %offset, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_cl_o_1d_v4f32_f32(i32 %offset, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.d.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_d_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_d_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.d.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.d.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.d.cl.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_d_cl_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.d.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_d_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.l.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_l_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.l.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_l_o_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.l.o.1d.f32.f32(i32 1, i32 %offset, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_l_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_l_o_1d_v4f32_f32(i32 %offset, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.b.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_b_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_b_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.b.cl.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_b_cl_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.b.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_b_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.lz.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_lz_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.lz.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_lz_o_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.lz.o.1d.f32.f32(i32 1, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_lz_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_lz_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.cd.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_cd_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_cd_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.cd.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.cd.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.cd.cl.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.cd.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_cd_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
-; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
-}
-
-; CHECK-LABEL: @extract_elt0_image_sample_c_o_v4f32_v4f32_v4i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.o.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
-; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_o_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
-}
-
-; CHECK-LABEL: @extract_elt0_image_sample_c_o_v4f32_v2f32_v4i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.o.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_o_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_o_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.o.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.cl.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_cl_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cl.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.d.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_d_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_d_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.d.cl.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.d.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.l.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_l_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.l.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_l_o_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.l.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_l_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_l_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.b.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_b_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_b_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.b.cl.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.b.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.lz.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_lz_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.lz.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.lz.o.1d.f32.f32(i32 1, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_lz_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.cd.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_cd_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_cd_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.sample.c.cd.cl.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.cl.o.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.sample.c.cd.cl.o.1d.f32.f32.f32(i32 1, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.gather4
@@ -1163,342 +1107,313 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.v4f32.v4f32.v8i32(<4 x f
  
  ; Don't handle gather4*
  
-; CHECK-LABEL: @extract_elt0_image_gather4_v4f32_v4f32_v8i32(
-; CHECK: %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i3
-define amdgpu_ps float @extract_elt0_image_gather4_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_gather4_2d_v4f32_f32(
+; CHECK: %data = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-; CHECK-LABEL: @extract_elt0_image_gather4_v4f32_v4f32_v4i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
-}
-
-; CHECK-LABEL: @extract_elt0_image_gather4_v4f32_v2f32_v4i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
-}
-
-declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.gather4.cl
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_gather4_cl_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_gather4_cl_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 2, float %s, float %t, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_cl_2d_v4f32_f32(float %s, float %t, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 2, float %s, float %t, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.gather4.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.gather4.l
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_gather4_l_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.l.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_l_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.l.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_gather4_l_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 4, float %s, float %t, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_l_2d_v4f32_f32(float %s, float %t, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 4, float %s, float %t, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.gather4.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.gather4.b
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_gather4_b_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_b_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.b.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32 8, float %bias, float %s, float %t, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32(float %bias, float %s, float %t, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32 8, float %bias, float %s, float %t, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.gather4.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.gather4.b.cl
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_b_cl_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32(float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.gather4.lz
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_gather4_lz_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_lz_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_gather4_lz_2d_v4f32_f16(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_lz_2d_v4f32_f16(half %s, half %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.gather4.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_gather4_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
-}
-
-; CHECK-LABEL: @extract_elt0_image_gather4_o_v4f32_v4f32_v4i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_o_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
-}
-
-; CHECK-LABEL: @extract_elt0_image_gather4_o_v4f32_v2f32_v4i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_o_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_gather4_o_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.o.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.gather4.cl.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_gather4_cl_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_gather4_cl_o_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_cl_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.gather4.l.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_gather4_l_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_l_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_gather4_l_o_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_l_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.gather4.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.gather4.b.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_gather4_b_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_b_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.gather4.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.gather4.b.cl.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_b_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.gather4.lz.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_gather4_lz_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_lz_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_gather4_lz_o_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_lz_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.gather4.c.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_gather4_c_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_c_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_gather4_c_o_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_c_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-; CHECK-LABEL: @extract_elt0_image_gather4_c_o_v4f32_v4f32_v4i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_c_o_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
-}
-
-; CHECK-LABEL: @extract_elt0_image_gather4_c_o_v4f32_v2f32_v4i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_c_o_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-  %elt0 = extractelement <4 x float> %data, i32 0
-  ret float %elt0
-}
-
-declare <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.o.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.gather4.c.cl.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_gather4_c_cl_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_c_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.gather4.c.l.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_gather4_c_l_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_c_l_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.gather4.c.b.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_gather4_c_b_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_c_b_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.gather4.c.b.cl.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_gather4_c_b_cl_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_c_b_cl_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.gather4.c.lz.o
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_gather4_c_lz_o_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
-define amdgpu_ps float @extract_elt0_image_gather4_c_lz_o_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %gather4r, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32(
+; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+define amdgpu_ps float @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
  ; --------------------------------------------------------------------
  ; llvm.amdgcn.image.getlod
  ; --------------------------------------------------------------------
  
-; CHECK-LABEL: @extract_elt0_image_getlod_v4f32_v4f32_v8i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getlod.f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+; CHECK-LABEL: @extract_elt0_image_getlod_1d_v4f32_f32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getlod.1d.f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+; CHECK-NEXT: ret float %data
+define amdgpu_ps float @extract_elt0_image_getlod_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
+  %elt0 = extractelement <4 x float> %data, i32 0
+  ret float %elt0
+}
+
+declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.load
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_load_2dmsaa_v4f32_i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 %sample, <8 x i32> %sampler, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_getlod_v4f32_v4f32_v8i32(<4 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_load_2dmsaa_v4f32_i32(i32 %s, i32 %t, i32 %sample, <8 x i32> inreg %sampler) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %sample, <8 x i32> %sampler, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-; CHECK-LABEL: @extract_elt0_image_getlod_v4f32_v4f32_v4i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getlod.f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.load.mip
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_load_mip_1d_v4f32_i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.load.mip.1d.f32.i32(i32 1, i32 %s, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_getlod_v4f32_v4f32_v4i32(<4 x float> %vaddr, <4 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v4i32(<4 x float> %vaddr, <4 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_load_mip_1d_v4f32_i32(i32 %s, i32 %mip, <8 x i32> inreg %sampler) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-; CHECK-LABEL: @extract_elt0_image_getlod_v4f32_v2f32_v4i32(
-; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getlod.f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false, i1 false)
+declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.getresinfo
+; --------------------------------------------------------------------
+
+; CHECK-LABEL: @extract_elt0_image_getresinfo_1d_v4f32_i32(
+; CHECK-NEXT: %data = call float @llvm.amdgcn.image.getresinfo.1d.f32.i32(i32 1, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
  ; CHECK-NEXT: ret float %data
-define amdgpu_ps float @extract_elt0_image_getlod_v4f32_v2f32_v4i32(<2 x float> %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
-  %data = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float> %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+define amdgpu_ps float @extract_elt0_image_getresinfo_1d_v4f32_i32(i32 %mip, <8 x i32> inreg %sampler) #0 {
+  %data = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %sampler, i32 0, i32 0)
    %elt0 = extractelement <4 x float> %data, i32 0
    ret float %elt0
  }
  
-declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
  
  attributes #0 = { nounwind }
  attributes #1 = { nounwind readonly }
author	Nicolai Haehnle <nhaehnle@gmail.com>
	Thu, 21 Jun 2018 13:37:31 +0000 (13:37 +0000)
committer	Nicolai Haehnle <nhaehnle@gmail.com>
	Thu, 21 Jun 2018 13:37:31 +0000 (13:37 +0000)
llvm/include/llvm/IR/IntrinsicsAMDGPU.td		patch \| blob \| history
llvm/lib/Transforms/InstCombine/CMakeLists.txt		patch \| blob \| history
llvm/lib/Transforms/InstCombine/InstCombineInternal.h		patch \| blob \| history
llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp		patch \| blob \| history
llvm/lib/Transforms/InstCombine/InstCombineTables.td	[new file with mode: 0644]	patch \| blob
llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll		patch \| blob \| history