From 2f5a73820c1ef6fbc687608deee56fa9c8c711b7 Mon Sep 17 00:00:00 2001
From: Nicolai Haehnle <nhaehnle@gmail.com>
Date: Wed, 4 Apr 2018 10:58:54 +0000
Subject: [PATCH] AMDGPU: Dimension-aware image intrinsics

Summary:
These new image intrinsics contain the texture type as part of
their name and have each component of the address/coordinate as
individual parameters.

This is a preparatory step for implementing the A16 feature, where
coordinates are passed as half-floats or -ints, but the Z compare
value and texel offsets are still full dwords, making it difficult
or impossible to distinguish between A16 on or off in the old-style
intrinsics.

Additionally, these intrinsics pass the 'texfailpolicy' and
'cachectrl' as i32 bit fields to reduce operand clutter and allow
for future extensibility.

v2:
- gather4 supports 2darray images
- fix a bug with 1D images on SI

Change-Id: I099f309e0a394082a5901ea196c3967afb867f04

Reviewers: arsenm, rampitec, b-sumner

Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye

Differential Revision: https://reviews.llvm.org/D44939

llvm-svn: 329166
---
 llvm/include/llvm/IR/Intrinsics.td                 |  14 +-
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td           | 378 +++++++++++++++++
 llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp         |   3 +
 llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h           |   6 +
 llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td   |  23 +-
 llvm/lib/Target/AMDGPU/MIMGInstructions.td         | 195 +++++++++
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp          |  41 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.td              |   6 +
 .../AMDGPU/llvm.amdgcn.image.atomic.ll             |  10 +
 .../CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll | 216 ++++++++++
 .../CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll    | 115 ++++++
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll  | 420 +++++++++++++++++++
 .../AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll    |  19 +
 .../AMDGPU/llvm.amdgcn.image.gather4.dim.ll        | 160 +++++++
 .../AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll     |  39 ++
 .../CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll | 459 +++++++++++++++++++++
 16 files changed, 2096 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 4361c25..2d6dd2e 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -117,6 +117,7 @@ def IntrHasSideEffects : IntrinsicProperty;
 
 class LLVMType<ValueType vt> {
   ValueType VT = vt;
+  int isAny = 0;
 }
 
 class LLVMQualPointerType<LLVMType elty, int addrspace>
@@ -131,6 +132,8 @@ class LLVMPointerType<LLVMType elty>
 class LLVMAnyPointerType<LLVMType elty>
   : LLVMType<iPTRAny>{
   LLVMType ElTy = elty;
+
+  let isAny = 1;
 }
 
 // Match the type of another intrinsic parameter.  Number is an index into the
@@ -163,10 +166,12 @@ class LLVMVectorOfAnyPointersToElt<int num> : LLVMMatchType<num>;
 class LLVMHalfElementsVectorType<int num> : LLVMMatchType<num>;
 
 def llvm_void_ty       : LLVMType<isVoid>;
-def llvm_any_ty        : LLVMType<Any>;
-def llvm_anyint_ty     : LLVMType<iAny>;
-def llvm_anyfloat_ty   : LLVMType<fAny>;
-def llvm_anyvector_ty  : LLVMType<vAny>;
+let isAny = 1 in {
+  def llvm_any_ty        : LLVMType<Any>;
+  def llvm_anyint_ty     : LLVMType<iAny>;
+  def llvm_anyfloat_ty   : LLVMType<fAny>;
+  def llvm_anyvector_ty  : LLVMType<vAny>;
+}
 def llvm_i1_ty         : LLVMType<i1>;
 def llvm_i8_ty         : LLVMType<i8>;
 def llvm_i16_ty        : LLVMType<i16>;
@@ -249,7 +254,6 @@ def llvm_v8f64_ty      : LLVMType<v8f64>;    //  8 x double
 
 def llvm_vararg_ty     : LLVMType<isVoid>;   // this means vararg here
 
-
 //===----------------------------------------------------------------------===//
 // Intrinsic Definitions.
 //===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 6ce3c86..6cf5c7f6 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -506,6 +506,384 @@ def int_amdgcn_image_atomic_cmpswap : Intrinsic <
 
 } // defset AMDGPUImageIntrinsics
 
+} // TargetPrefix = "amdgcn"
+
+// New-style image intrinsics
+
+//////////////////////////////////////////////////////////////////////////
+// Dimension-aware image intrinsics framework
+//////////////////////////////////////////////////////////////////////////
+
+// Helper class to represent (type, name) combinations of arguments. The
+// argument names are explanatory and used as DAG operand names for codegen
+// pattern matching.
+class AMDGPUArg<LLVMType ty, string name> {
+  LLVMType Type = ty;
+  string Name = name;
+}
+
+// Return [AMDGPUArg<basety, names[0]>, AMDGPUArg<LLVMMatchType<0>, names[1]>, ...]
+class makeArgList<list<string> names, LLVMType basety> {
+  list<AMDGPUArg> ret =
+    !listconcat([AMDGPUArg<basety, names[0]>],
+                !foreach(name, !tail(names), AMDGPUArg<LLVMMatchType<0>, name>));
+}
+
+// Return arglist, with LLVMMatchType's references shifted by 'shift'.
+class arglistmatchshift<list<AMDGPUArg> arglist, int shift> {
+  list<AMDGPUArg> ret =
+    !foreach(arg, arglist,
+             !if(!isa<LLVMMatchType>(arg.Type),
+                 AMDGPUArg<LLVMMatchType<!add(!cast<LLVMMatchType>(arg.Type).Number, shift)>,
+                           arg.Name>,
+                 arg));
+}
+
+// Return the concatenation of the given arglists. LLVMMatchType's are adjusted
+// accordingly, and shifted by an additional 'shift'.
+class arglistconcat<list<list<AMDGPUArg>> arglists, int shift = 0> {
+  list<AMDGPUArg> ret =
+    !foldl([]<AMDGPUArg>, arglists, lhs, rhs,
+           !listconcat(
+             lhs,
+             arglistmatchshift<rhs,
+                               !add(shift, !foldl(0, lhs, a, b,
+                                                  !add(a, b.Type.isAny)))>.ret));
+}
+
+// Represent texture/image types / dimensionality.
+class AMDGPUDimProps<string name, list<string> coord_names, list<string> slice_names> {
+  string Name = name; // e.g. "2darraymsaa"
+  bit DA = 0; // DA bit in MIMG encoding
+
+  list<AMDGPUArg> CoordSliceArgs =
+    makeArgList<!listconcat(coord_names, slice_names), llvm_anyfloat_ty>.ret;
+  list<AMDGPUArg> CoordSliceIntArgs =
+    makeArgList<!listconcat(coord_names, slice_names), llvm_anyint_ty>.ret;
+  list<AMDGPUArg> GradientArgs =
+    makeArgList<!listconcat(!foreach(name, coord_names, "d" # name # "dh"),
+                            !foreach(name, coord_names, "d" # name # "dv")),
+                llvm_anyfloat_ty>.ret;
+}
+
+def AMDGPUDim1D : AMDGPUDimProps<"1d", ["s"], []>;
+def AMDGPUDim2D : AMDGPUDimProps<"2d", ["s", "t"], []>;
+def AMDGPUDim3D : AMDGPUDimProps<"3d", ["s", "t", "r"], []>;
+let DA = 1 in {
+  def AMDGPUDimCube : AMDGPUDimProps<"cube", ["s", "t"], ["face"]>;
+  def AMDGPUDim1DArray : AMDGPUDimProps<"1darray", ["s"], ["slice"]>;
+  def AMDGPUDim2DArray : AMDGPUDimProps<"2darray", ["s", "t"], ["slice"]>;
+}
+def AMDGPUDim2DMsaa : AMDGPUDimProps<"2dmsaa", ["s", "t"], ["fragid"]>;
+let DA = 1 in {
+  def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<"2darraymsaa", ["s", "t"], ["slice", "fragid"]>;
+}
+
+def AMDGPUDims {
+  list<AMDGPUDimProps> NoMsaa = [AMDGPUDim1D, AMDGPUDim2D, AMDGPUDim3D,
+                                 AMDGPUDimCube, AMDGPUDim1DArray,
+                                 AMDGPUDim2DArray];
+  list<AMDGPUDimProps> Msaa = [AMDGPUDim2DMsaa, AMDGPUDim2DArrayMsaa];
+  list<AMDGPUDimProps> All = !listconcat(NoMsaa, Msaa);
+}
+
+// Represent sample variants, i.e. _C, _O, _B, ... and combinations thereof.
+class AMDGPUSampleVariant<string ucmod, string lcmod, list<AMDGPUArg> extra_addr> {
+  string UpperCaseMod = ucmod;
+  string LowerCaseMod = lcmod;
+
+  // {offset} {bias} {z-compare}
+  list<AMDGPUArg> ExtraAddrArgs = extra_addr;
+  bit Gradients = 0;
+
+  // Name of the {lod} or {clamp} argument that is appended to the coordinates,
+  // if any.
+  string LodOrClamp = "";
+}
+
+// AMDGPUSampleVariants: all variants supported by IMAGE_SAMPLE
+// AMDGPUSampleVariantsNoGradients: variants supported by IMAGE_GATHER4
+defset list<AMDGPUSampleVariant> AMDGPUSampleVariants = {
+  multiclass AMDGPUSampleHelper_Offset<string ucmod, string lcmod,
+                                       list<AMDGPUArg> extra_addr> {
+    def NAME#lcmod : AMDGPUSampleVariant<ucmod, lcmod, extra_addr>;
+    def NAME#lcmod#_o : AMDGPUSampleVariant<
+        ucmod#"_O", lcmod#"_o", !listconcat([AMDGPUArg<llvm_i32_ty, "offset">], extra_addr)>;
+  }
+
+  multiclass AMDGPUSampleHelper_Compare<string ucmod, string lcmod,
+                                        list<AMDGPUArg> extra_addr> {
+    defm NAME : AMDGPUSampleHelper_Offset<ucmod, lcmod, extra_addr>;
+    defm NAME : AMDGPUSampleHelper_Offset<
+        "_C"#ucmod, "_c"#lcmod, !listconcat(extra_addr, [AMDGPUArg<llvm_float_ty, "zcompare">])>;
+  }
+
+  multiclass AMDGPUSampleHelper_Clamp<string ucmod, string lcmod,
+                                      list<AMDGPUArg> extra_addr> {
+    defm NAME : AMDGPUSampleHelper_Compare<ucmod, lcmod, extra_addr>;
+    let LodOrClamp = "clamp" in
+    defm NAME : AMDGPUSampleHelper_Compare<ucmod#"_CL", lcmod#"_cl", extra_addr>;
+  }
+
+  defset list<AMDGPUSampleVariant> AMDGPUSampleVariantsNoGradients = {
+    defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"", "", []>;
+    defm AMDGPUSample : AMDGPUSampleHelper_Clamp<
+        "_B", "_b", [AMDGPUArg<llvm_anyfloat_ty, "bias">]>;
+    let LodOrClamp = "lod" in
+    defm AMDGPUSample : AMDGPUSampleHelper_Compare<"_L", "_l", []>;
+    defm AMDGPUSample : AMDGPUSampleHelper_Compare<"_LZ", "_lz", []>;
+  }
+
+  let Gradients = 1 in {
+    defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_D", "_d", []>;
+    defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_CD", "_cd", []>;
+  }
+}
+
+// Helper class to capture the profile of a dimension-aware image intrinsic.
+// This information is used to generate the intrinsic's type and to inform
+// codegen pattern matching.
+class AMDGPUDimProfile<string opmod,
+                       AMDGPUDimProps dim> {
+  AMDGPUDimProps Dim = dim;
+  string OpMod = opmod; // the corresponding instruction is named IMAGE_OpMod
+
+  // These are entended to be overwritten by subclasses
+  bit IsSample = 0;
+  bit IsAtomic = 0;
+  list<LLVMType> RetTypes = [];
+  list<AMDGPUArg> DataArgs = [];
+  list<AMDGPUArg> ExtraAddrArgs = [];
+  bit Gradients = 0;
+  string LodClampMip = "";
+
+  int NumRetAndDataAnyTypes =
+    !foldl(0, !listconcat(RetTypes, !foreach(arg, DataArgs, arg.Type)), a, b,
+           !add(a, b.isAny));
+
+  list<AMDGPUArg> AddrArgs =
+    arglistconcat<[ExtraAddrArgs,
+                   !if(Gradients, dim.GradientArgs, []),
+                   !listconcat(!if(IsSample, dim.CoordSliceArgs, dim.CoordSliceIntArgs),
+                               !if(!eq(LodClampMip, ""),
+                                   []<AMDGPUArg>,
+                                   [AMDGPUArg<LLVMMatchType<0>, LodClampMip>]))],
+                  NumRetAndDataAnyTypes>.ret;
+  list<LLVMType> AddrTypes = !foreach(arg, AddrArgs, arg.Type);
+  list<AMDGPUArg> AddrDefaultArgs =
+    !foreach(arg, AddrArgs,
+             AMDGPUArg<!if(!or(arg.Type.isAny, !isa<LLVMMatchType>(arg.Type)),
+                           !if(IsSample, llvm_float_ty, llvm_i32_ty), arg.Type),
+                       arg.Name>);
+  list<AMDGPUArg> AddrA16Args =
+    !foreach(arg, AddrArgs,
+             AMDGPUArg<!if(!or(arg.Type.isAny, !isa<LLVMMatchType>(arg.Type)),
+                           !if(IsSample, llvm_half_ty, llvm_i16_ty), arg.Type),
+                       arg.Name>);
+}
+
+class AMDGPUDimProfileCopy<AMDGPUDimProfile base> : AMDGPUDimProfile<base.OpMod, base.Dim> {
+  let IsSample = base.IsSample;
+  let IsAtomic = base.IsAtomic;
+  let RetTypes = base.RetTypes;
+  let DataArgs = base.DataArgs;
+  let ExtraAddrArgs = base.ExtraAddrArgs;
+  let Gradients = base.Gradients;
+  let LodClampMip = base.LodClampMip;
+}
+
+class AMDGPUDimSampleProfile<string opmod,
+                             AMDGPUDimProps dim,
+                             AMDGPUSampleVariant sample> : AMDGPUDimProfile<opmod, dim> {
+  let IsSample = 1;
+  let RetTypes = [llvm_anyfloat_ty];
+  let ExtraAddrArgs = sample.ExtraAddrArgs;
+  let Gradients = sample.Gradients;
+  let LodClampMip = sample.LodOrClamp;
+}
+
+class AMDGPUDimNoSampleProfile<string opmod,
+                               AMDGPUDimProps dim,
+                               list<LLVMType> retty,
+                               list<AMDGPUArg> dataargs,
+                               bit Mip = 0> : AMDGPUDimProfile<opmod, dim> {
+  let RetTypes = retty;
+  let DataArgs = dataargs;
+  let LodClampMip = !if(Mip, "mip", "");
+}
+
+class AMDGPUDimAtomicProfile<string opmod,
+                             AMDGPUDimProps dim,
+                             list<AMDGPUArg> dataargs> : AMDGPUDimProfile<opmod, dim> {
+  let RetTypes = [llvm_anyint_ty];
+  let DataArgs = dataargs;
+  let IsAtomic = 1;
+}
+
+class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RESINFO", dim> {
+  let RetTypes = [llvm_anyfloat_ty];
+  let DataArgs = [];
+  let AddrArgs = [AMDGPUArg<llvm_anyint_ty, "mip">];
+  let LodClampMip = "mip";
+}
+
+// All dimension-aware intrinsics are derived from this class.
+class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
+                              list<IntrinsicProperty> props,
+                              list<SDNodeProperty> sdnodeprops> : Intrinsic<
+    P_.RetTypes,        // vdata(VGPR) -- for load/atomic-with-return
+    !listconcat(
+      !foreach(arg, P_.DataArgs, arg.Type),      // vdata(VGPR) -- for store/atomic
+      !if(P_.IsAtomic, [], [llvm_i32_ty]),       // dmask(imm)
+      P_.AddrTypes,                              // vaddr(VGPR)
+      [llvm_v8i32_ty],                           // rsrc(SGPR)
+      !if(P_.IsSample, [llvm_v4i32_ty,           // samp(SGPR)
+                        llvm_i1_ty], []),        // unorm(imm)
+      [llvm_i32_ty,                              // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe)
+       llvm_i32_ty]),                            // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+      props, "", sdnodeprops>,
+  AMDGPURsrcIntrinsic<!add(!size(P_.DataArgs), !size(P_.AddrTypes),
+                           !if(P_.IsAtomic, 0, 1)), 1> {
+  AMDGPUDimProfile P = P_;
+
+  let TargetPrefix = "amdgcn";
+}
+
+defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
+
+  //////////////////////////////////////////////////////////////////////////
+  // Load and store intrinsics
+  //////////////////////////////////////////////////////////////////////////
+  multiclass AMDGPUImageDimIntrinsicsNoMsaa<string opmod,
+                                            list<LLVMType> retty,
+                                            list<AMDGPUArg> dataargs,
+                                            list<IntrinsicProperty> props,
+                                            list<SDNodeProperty> sdnodeprops,
+                                            bit Mip = 0> {
+    foreach dim = AMDGPUDims.NoMsaa in {
+      def !strconcat(NAME, "_", dim.Name)
+        : AMDGPUImageDimIntrinsic<
+            AMDGPUDimNoSampleProfile<opmod, dim, retty, dataargs, Mip>,
+            props, sdnodeprops>;
+    }
+  }
+
+  multiclass AMDGPUImageDimIntrinsicsAll<string opmod,
+                                         list<LLVMType> retty,
+                                         list<AMDGPUArg> dataargs,
+                                         list<IntrinsicProperty> props,
+                                         list<SDNodeProperty> sdnodeprops,
+                                         bit Mip = 0> {
+    foreach dim = AMDGPUDims.All in {
+      def !strconcat(NAME, "_", dim.Name)
+        : AMDGPUImageDimIntrinsic<
+            AMDGPUDimNoSampleProfile<opmod, dim, retty, dataargs, Mip>,
+            props, sdnodeprops>;
+    }
+  }
+
+  defm int_amdgcn_image_load : AMDGPUImageDimIntrinsicsAll<
+              "LOAD", [llvm_anyfloat_ty], [], [IntrReadMem], [SDNPMemOperand]>;
+  defm int_amdgcn_image_load_mip : AMDGPUImageDimIntrinsicsNoMsaa<
+              "LOAD_MIP", [llvm_anyfloat_ty], [], [IntrReadMem], [SDNPMemOperand], 1>;
+
+  defm int_amdgcn_image_store : AMDGPUImageDimIntrinsicsAll<
+              "STORE", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
+              [IntrWriteMem], [SDNPMemOperand]>;
+  defm int_amdgcn_image_store_mip : AMDGPUImageDimIntrinsicsNoMsaa<
+              "STORE_MIP", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
+              [IntrWriteMem], [SDNPMemOperand], 1>;
+
+  //////////////////////////////////////////////////////////////////////////
+  // sample and getlod intrinsics
+  //////////////////////////////////////////////////////////////////////////
+  multiclass AMDGPUImageDimSampleDims<string opmod,
+                                      AMDGPUSampleVariant sample,
+                                      bit NoMem = 0> {
+    foreach dim = AMDGPUDims.NoMsaa in {
+      def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic<
+          AMDGPUDimSampleProfile<opmod, dim, sample>,
+          !if(NoMem, [IntrNoMem], [IntrReadMem]),
+          !if(NoMem, [], [SDNPMemOperand])>;
+    }
+  }
+
+  foreach sample = AMDGPUSampleVariants in {
+    defm int_amdgcn_image_sample # sample.LowerCaseMod :
+        AMDGPUImageDimSampleDims<"SAMPLE" # sample.UpperCaseMod, sample>;
+  }
+
+  defm int_amdgcn_image_getlod : AMDGPUImageDimSampleDims<"GET_LOD", AMDGPUSample, 1>;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// getresinfo intrinsics (separate due to D16)
+//////////////////////////////////////////////////////////////////////////
+defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimGetResInfoIntrinsics = {
+  foreach dim = AMDGPUDims.All in {
+    def !strconcat("int_amdgcn_image_getresinfo_", dim.Name)
+      : AMDGPUImageDimIntrinsic<AMDGPUDimGetResInfoProfile<dim>, [IntrNoMem], []>;
+  }
+}
+
+//////////////////////////////////////////////////////////////////////////
+// gather4 intrinsics
+//////////////////////////////////////////////////////////////////////////
+defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimGatherIntrinsics = {
+  foreach sample = AMDGPUSampleVariantsNoGradients in {
+    foreach dim = [AMDGPUDim2D, AMDGPUDimCube, AMDGPUDim2DArray] in {
+      def int_amdgcn_image_gather4 # sample.LowerCaseMod # _ # dim.Name:
+          AMDGPUImageDimIntrinsic<
+              AMDGPUDimSampleProfile<"GATHER4" # sample.UpperCaseMod, dim, sample>,
+              [IntrReadMem], [SDNPMemOperand]>;
+    }
+  }
+}
+
+//////////////////////////////////////////////////////////////////////////
+// atomic intrinsics
+//////////////////////////////////////////////////////////////////////////
+defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
+  multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs> {
+    foreach dim = AMDGPUDims.All in {
+      def !strconcat(NAME, "_", dim.Name)
+        : AMDGPUImageDimIntrinsic<
+            AMDGPUDimAtomicProfile<opmod, dim, dataargs>,
+            [], [SDNPMemOperand]>;
+    }
+  }
+
+  multiclass AMDGPUImageDimAtomic<string opmod> {
+    defm "" : AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">]>;
+  }
+
+  defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAtomic<"ATOMIC_SWAP">;
+  defm int_amdgcn_image_atomic_add : AMDGPUImageDimAtomic<"ATOMIC_ADD">;
+  defm int_amdgcn_image_atomic_sub : AMDGPUImageDimAtomic<"ATOMIC_SUB">;
+  defm int_amdgcn_image_atomic_smin : AMDGPUImageDimAtomic<"ATOMIC_SMIN">;
+  defm int_amdgcn_image_atomic_umin : AMDGPUImageDimAtomic<"ATOMIC_UMIN">;
+  defm int_amdgcn_image_atomic_smax : AMDGPUImageDimAtomic<"ATOMIC_SMAX">;
+  defm int_amdgcn_image_atomic_umax : AMDGPUImageDimAtomic<"ATOMIC_UMAX">;
+  defm int_amdgcn_image_atomic_and : AMDGPUImageDimAtomic<"ATOMIC_AND">;
+  defm int_amdgcn_image_atomic_or : AMDGPUImageDimAtomic<"ATOMIC_OR">;
+  defm int_amdgcn_image_atomic_xor : AMDGPUImageDimAtomic<"ATOMIC_XOR">;
+
+  // TODO: INC/DEC are weird: they seem to have a vdata argument in hardware,
+  //       even though it clearly shouldn't be needed
+  defm int_amdgcn_image_atomic_inc : AMDGPUImageDimAtomic<"ATOMIC_INC">;
+  defm int_amdgcn_image_atomic_dec : AMDGPUImageDimAtomic<"ATOMIC_DEC">;
+
+  defm int_amdgcn_image_atomic_cmpswap :
+      AMDGPUImageDimAtomicX<"ATOMIC_CMPSWAP", [AMDGPUArg<LLVMMatchType<0>, "src">,
+                                               AMDGPUArg<LLVMMatchType<0>, "cmp">]>;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Buffer intrinsics
+//////////////////////////////////////////////////////////////////////////
+
+let TargetPrefix = "amdgcn" in {
+
 defset list<AMDGPURsrcIntrinsic> AMDGPUBufferIntrinsics = {
 
 class AMDGPUBufferLoad : Intrinsic <
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index cb2064c..32118df 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -29,6 +29,9 @@ namespace llvm {
 namespace AMDGPU {
 #define GET_RSRCINTRINSIC_IMPL
 #include "AMDGPUGenSearchableTables.inc"
+
+#define GET_D16IMAGEDIMINTRINSIC_IMPL
+#include "AMDGPUGenSearchableTables.inc"
 }
 }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 8f75b42..766ee3d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -63,6 +63,12 @@ struct RsrcIntrinsic {
 };
 const RsrcIntrinsic *lookupRsrcIntrinsicByIntr(unsigned Intr);
 
+struct D16ImageDimIntrinsic {
+  unsigned Intr;
+  unsigned D16HelperIntr;
+};
+const D16ImageDimIntrinsic *lookupD16ImageDimIntrinsicByIntr(unsigned Intr);
+
 } // end AMDGPU namespace
 } // End llvm namespace
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index 0b9fe078..fce7499 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -23,7 +23,11 @@ class RsrcIntrinsic<AMDGPURsrcIntrinsic intr> : SearchableTable {
 }
 
 foreach intr = !listconcat(AMDGPUBufferIntrinsics,
-                           AMDGPUImageIntrinsics) in {
+                           AMDGPUImageIntrinsics,
+                           AMDGPUImageDimIntrinsics,
+                           AMDGPUImageDimGatherIntrinsics,
+                           AMDGPUImageDimGetResInfoIntrinsics,
+                           AMDGPUImageDimAtomicIntrinsics) in {
   def : RsrcIntrinsic<!cast<AMDGPURsrcIntrinsic>(intr)>;
 }
 
@@ -76,3 +80,20 @@ def : SourceOfDivergence<int_amdgcn_buffer_atomic_xor>;
 def : SourceOfDivergence<int_amdgcn_buffer_atomic_cmpswap>;
 def : SourceOfDivergence<int_amdgcn_ps_live>;
 def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
+
+foreach intr = AMDGPUImageDimAtomicIntrinsics in
+def : SourceOfDivergence<intr>;
+
+class D16ImageDimIntrinsic<AMDGPUImageDimIntrinsic intr> : SearchableTable {
+  let SearchableFields = ["Intr"];
+  let EnumNameField = ?;
+
+  Intrinsic Intr = intr;
+  code D16HelperIntr =
+      !cast<code>("AMDGPUIntrinsic::SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name);
+}
+
+foreach intr = !listconcat(AMDGPUImageDimIntrinsics,
+                           AMDGPUImageDimGatherIntrinsics) in {
+  def : D16ImageDimIntrinsic<intr>;
+}
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 7a7b54e..2d2aaf7 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -464,6 +464,201 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o"
 //def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>;
 }
 
+/********** ============================== **********/
+/********** Dimension-aware image patterns **********/
+/********** ============================== **********/
+
+class getDwordsType<int dwords> {
+  int NumDwords = dwords;
+  string suffix = !if(!lt(dwords, 1), ?,
+                  !if(!eq(dwords, 1), "_V1",
+                  !if(!eq(dwords, 2), "_V2",
+                  !if(!le(dwords, 4), "_V4",
+                  !if(!le(dwords, 8), "_V8",
+                  !if(!le(dwords, 16), "_V16", ?))))));
+  ValueType VT = !if(!lt(dwords, 1), ?,
+                 !if(!eq(dwords, 1), f32,
+                 !if(!eq(dwords, 2), v2f32,
+                 !if(!le(dwords, 4), v4f32,
+                 !if(!le(dwords, 8), v8f32,
+                 !if(!le(dwords, 16), v16f32, ?))))));
+  RegisterClass VReg = !if(!lt(dwords, 1), ?,
+                       !if(!eq(dwords, 1), VGPR_32,
+                       !if(!eq(dwords, 2), VReg_64,
+                       !if(!le(dwords, 4), VReg_128,
+                       !if(!le(dwords, 8), VReg_256,
+                       !if(!le(dwords, 16), VReg_512, ?))))));
+}
+
+class makeRegSequence_Fold<int i, dag d> {
+  int idx = i;
+  dag lhs = d;
+}
+
+// Generate a dag node which returns a vector register of class RC into which
+// the source operands given by names have been inserted (assuming that each
+// name corresponds to an operand whose size is equal to a subregister).
+class makeRegSequence<ValueType vt, RegisterClass RC, list<string> names> {
+  dag ret =
+    !if(!eq(!size(names), 1),
+        !dag(COPY_TO_REGCLASS, [?, RC], [names[0], ?]),
+        !foldl(makeRegSequence_Fold<0, (vt (IMPLICIT_DEF))>, names, f, name,
+               makeRegSequence_Fold<
+                 !add(f.idx, 1),
+                 !con((INSERT_SUBREG f.lhs),
+                      !dag(INSERT_SUBREG, [?,    !cast<SubRegIndex>("sub"#f.idx)],
+                                          [name, ?]))>).lhs);
+}
+
+class ImageDimPattern<AMDGPUImageDimIntrinsic I,
+                      string dop, ValueType dty,
+                      string suffix = ""> : GCNPat<(undef), (undef)> {
+  list<AMDGPUArg> AddrArgs = I.P.AddrDefaultArgs;
+  getDwordsType AddrDwords = getDwordsType<!size(AddrArgs)>;
+
+  Instruction MI =
+    !cast<Instruction>(!strconcat("IMAGE_", I.P.OpMod, dop, AddrDwords.suffix, suffix));
+
+  // DAG fragment to match data arguments (vdata for store/atomic, dmask
+  // for non-atomic).
+  dag MatchDataDag =
+    !con(!dag(I, !foreach(arg, I.P.DataArgs, dty),
+                 !foreach(arg, I.P.DataArgs, arg.Name)),
+         !if(I.P.IsAtomic, (I), (I i32:$dmask)));
+
+  // DAG fragment to match vaddr arguments.
+  dag MatchAddrDag = !dag(I, !foreach(arg, AddrArgs, arg.Type.VT),
+                             !foreach(arg, AddrArgs, arg.Name));
+
+  // DAG fragment to match sampler resource and unorm arguments.
+  dag MatchSamplerDag = !if(I.P.IsSample, (I v4i32:$sampler, i1:$unorm), (I));
+
+  // DAG node that generates the MI vdata for store/atomic
+  getDwordsType DataDwords = getDwordsType<!size(I.P.DataArgs)>;
+  dag GenDataDag =
+    !if(I.P.IsAtomic, (MI makeRegSequence<DataDwords.VT, DataDwords.VReg,
+                                          !foreach(arg, I.P.DataArgs, arg.Name)>.ret),
+    !if(!size(I.P.DataArgs), (MI $vdata), (MI)));
+
+  // DAG node that generates the MI vaddr
+  dag GenAddrDag = makeRegSequence<AddrDwords.VT, AddrDwords.VReg,
+                                   !foreach(arg, AddrArgs, arg.Name)>.ret;
+  // DAG fragments that generate various inline flags
+  dag GenDmask =
+    !if(I.P.IsAtomic, (MI !add(!shl(1, DataDwords.NumDwords), -1)),
+                      (MI (as_i32imm $dmask)));
+  dag GenGLC =
+    !if(I.P.IsAtomic, (MI 1),
+                      (MI (bitextract_imm<0> $cachepolicy)));
+
+  dag MatchIntrinsic = !con(MatchDataDag,
+                            MatchAddrDag,
+                            (I v8i32:$rsrc),
+                            MatchSamplerDag,
+                            (I 0/*texfailctrl*/,
+                               i32:$cachepolicy));
+  let PatternToMatch =
+    !if(!size(I.RetTypes), (dty MatchIntrinsic), MatchIntrinsic);
+
+  bit IsCmpSwap = !and(I.P.IsAtomic, !eq(!size(I.P.DataArgs), 2));
+  dag ImageInstruction =
+    !con(GenDataDag,
+         (MI GenAddrDag),
+         (MI $rsrc),
+         !if(I.P.IsSample, (MI $sampler), (MI)),
+         GenDmask,
+         !if(I.P.IsSample, (MI (as_i1imm $unorm)), (MI 1)),
+         GenGLC,
+         (MI (bitextract_imm<1> $cachepolicy),
+             0, /* r128 */
+             0, /* tfe */
+             0 /*(as_i1imm $lwe)*/,
+             { I.P.Dim.DA }));
+  let ResultInstrs = [
+    !if(IsCmpSwap, (EXTRACT_SUBREG ImageInstruction, sub0), ImageInstruction)
+  ];
+}
+
+foreach intr = !listconcat(AMDGPUImageDimIntrinsics,
+                           AMDGPUImageDimGetResInfoIntrinsics) in {
+  def intr#_pat_v1 : ImageDimPattern<intr, "_V1", f32>;
+  def intr#_pat_v2 : ImageDimPattern<intr, "_V2", v2f32>;
+  def intr#_pat_v4 : ImageDimPattern<intr, "_V4", v4f32>;
+}
+
+// v2f16 and v4f16 are used as data types to signal that D16 should be used.
+// However, they are not (always) legal types, and the SelectionDAG requires us
+// to legalize them before running any patterns. So we legalize them by
+// converting to an int type of equal size and using an internal 'd16helper'
+// intrinsic instead which signifies both the use of D16 and actually allows
+// this integer-based return type.
+multiclass ImageDimD16Helper<AMDGPUImageDimIntrinsic I,
+                             AMDGPUImageDimIntrinsic d16helper> {
+  let SubtargetPredicate = HasUnpackedD16VMem in {
+    def _unpacked_v1 : ImageDimPattern<I, "_V1", f16, "_D16_gfx80">;
+    def _unpacked_v2 : ImageDimPattern<d16helper, "_V2", v2i32, "_D16_gfx80">;
+    def _unpacked_v4 : ImageDimPattern<d16helper, "_V4", v4i32, "_D16_gfx80">;
+  } // End HasUnpackedD16VMem.
+
+  let SubtargetPredicate = HasPackedD16VMem in {
+    def _packed_v1 : ImageDimPattern<I, "_V1", f16, "_D16">;
+    // used on gfx810
+    def _packed_v2 : ImageDimPattern<d16helper, "_V1", i32, "_D16">;
+    // used on gfx900
+    def _packed_v2_gfx9 : ImageDimPattern<I, "_V1", v2f16, "_D16">;
+    def _packed_v4 : ImageDimPattern<d16helper, "_V2", v2i32, "_D16">;
+  } // End HasPackedD16VMem.
+}
+
+foreach intr = AMDGPUImageDimIntrinsics in {
+  def intr#_d16helper_profile : AMDGPUDimProfileCopy<intr.P> {
+    let RetTypes = !foreach(ty, intr.P.RetTypes, llvm_any_ty);
+    let DataArgs = !foreach(arg, intr.P.DataArgs, AMDGPUArg<llvm_any_ty, arg.Name>);
+  }
+
+  let TargetPrefix = "SI", isTarget = 1 in
+  def int_SI_image_d16helper_ # intr.P.OpMod # intr.P.Dim.Name :
+      AMDGPUImageDimIntrinsic<!cast<AMDGPUDimProfile>(intr#"_d16helper_profile"),
+                              intr.IntrProperties, intr.Properties>;
+
+  defm intr#_d16 :
+      ImageDimD16Helper<
+          intr, !cast<AMDGPUImageDimIntrinsic>(
+                    "int_SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name)>;
+}
+
+foreach intr = AMDGPUImageDimGatherIntrinsics in {
+  def intr#_pat3 : ImageDimPattern<intr, "_V4", v4f32>;
+
+  def intr#_d16helper_profile : AMDGPUDimProfileCopy<intr.P> {
+    let RetTypes = !foreach(ty, intr.P.RetTypes, llvm_any_ty);
+    let DataArgs = !foreach(arg, intr.P.DataArgs, AMDGPUArg<llvm_any_ty, arg.Name>);
+  }
+
+  let TargetPrefix = "SI", isTarget = 1 in
+  def int_SI_image_d16helper_ # intr.P.OpMod # intr.P.Dim.Name :
+      AMDGPUImageDimIntrinsic<!cast<AMDGPUDimProfile>(intr#"_d16helper_profile"),
+                              intr.IntrProperties, intr.Properties>;
+
+  let SubtargetPredicate = HasUnpackedD16VMem in {
+    def intr#_unpacked_v4 :
+        ImageDimPattern<!cast<AMDGPUImageDimIntrinsic>(
+                            "int_SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name),
+                        "_V4", v4i32, "_D16_gfx80">;
+  } // End HasUnpackedD16VMem.
+
+  let SubtargetPredicate = HasPackedD16VMem in {
+    def intr#_packed_v4 :
+        ImageDimPattern<!cast<AMDGPUImageDimIntrinsic>(
+                            "int_SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name),
+                        "_V2", v2i32, "_D16">;
+  } // End HasPackedD16VMem.
+}
+
+foreach intr = AMDGPUImageDimAtomicIntrinsics in {
+  def intr#_pat1 : ImageDimPattern<intr, "_V1", i32>;
+}
+
 /********** ======================= **********/
 /********** Image sampling patterns **********/
 /********** ======================= **********/
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 2a7549e..6f68f63 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3677,9 +3677,23 @@ SDValue SITargetLowering::lowerIntrinsicWChain_IllegalReturnType(SDValue Op,
     Chain = Res.getValue(1);
     return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked);
   }
-  default:
+  default: {
+    const AMDGPU::D16ImageDimIntrinsic *D16ImageDimIntr =
+        AMDGPU::lookupD16ImageDimIntrinsicByIntr(IID);
+    if (D16ImageDimIntr) {
+      SmallVector<SDValue, 20> Ops;
+      for (auto Value : Op.getNode()->op_values())
+        Ops.push_back(Value);
+      Ops[1] = DAG.getConstant(D16ImageDimIntr->D16HelperIntr, DL, MVT::i32);
+      Res = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTList, Ops,
+                                    M->getMemoryVT(), M->getMemOperand());
+      Chain = Res.getValue(1);
+      return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked);
+    }
+
     return SDValue();
   }
+  }
 }
 
 void SITargetLowering::ReplaceNodeResults(SDNode *N,
@@ -5151,9 +5165,32 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
                                    M->getMemoryVT(), M->getMemOperand());
   }
 
-  default:
+  default: {
+    const AMDGPU::D16ImageDimIntrinsic *D16ImageDimIntr =
+        AMDGPU::lookupD16ImageDimIntrinsicByIntr(IntrinsicID);
+    if (D16ImageDimIntr) {
+      SDValue VData = Op.getOperand(2);
+      EVT StoreVT = VData.getValueType();
+      if ((StoreVT == MVT::v2f16 && !isTypeLegal(StoreVT)) ||
+          StoreVT == MVT::v4f16) {
+        VData = handleD16VData(VData, DAG);
+
+        SmallVector<SDValue, 12> Ops;
+        for (auto Value : Op.getNode()->op_values())
+          Ops.push_back(Value);
+        Ops[1] = DAG.getConstant(D16ImageDimIntr->D16HelperIntr, DL, MVT::i32);
+        Ops[2] = VData;
+
+        MemSDNode *M = cast<MemSDNode>(Op);
+        return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Op->getVTList(),
+                                       Ops, M->getMemoryVT(),
+                                       M->getMemOperand());
+      }
+    }
+
     return Op;
   }
+  }
 }
 
 SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 777bb0a..8797253 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -572,6 +572,12 @@ return CurDAG->getTargetConstant(
   N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
 }]>;
 
+class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
+  uint64_t Imm = N->getZExtValue();
+  unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;
+  return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1);
+}]>;
+
 def SIMM16bit : PatLeaf <(imm),
   [{return isInt<16>(N->getSExtValue());}]
 >;
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
index 4a3b60f..76d501b 100644
--- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll
@@ -104,6 +104,14 @@ main_body:
   ret float %r
 }
 
+;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(
+define float @image_atomic_add_2d(<8 x i32> inreg %rsrc, i32 inreg %s, i32 inreg %t, i32 inreg %data) #0 {
+main_body:
+  %orig = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  %r = bitcast i32 %orig to float
+  ret float %r
+}
+
 declare i32 @llvm.amdgcn.image.atomic.swap.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
 declare i32 @llvm.amdgcn.image.atomic.add.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
 declare i32 @llvm.amdgcn.image.atomic.sub.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
@@ -118,4 +126,6 @@ declare i32 @llvm.amdgcn.image.atomic.inc.i32(i32, i32, <8 x i32>, i1, i1, i1) #
 declare i32 @llvm.amdgcn.image.atomic.dec.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
 declare i32 @llvm.amdgcn.image.atomic.cmpswap.i32(i32, i32, i32, <8 x i32>,i1, i1, i1) #0
 
+declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
+
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
new file mode 100644
index 0000000..bc0356a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
@@ -0,0 +1,216 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}atomic_swap_1d:
+; GCN: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+define amdgpu_ps float @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_add_1d:
+; GCN: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+define amdgpu_ps float @atomic_add_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_sub_1d:
+; GCN: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+define amdgpu_ps float @atomic_sub_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_smin_1d:
+; GCN: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+define amdgpu_ps float @atomic_smin_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_umin_1d:
+; GCN: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+define amdgpu_ps float @atomic_umin_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_smax_1d:
+; GCN: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+define amdgpu_ps float @atomic_smax_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_umax_1d:
+; GCN: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+define amdgpu_ps float @atomic_umax_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_and_1d:
+; GCN: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+define amdgpu_ps float @atomic_and_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_or_1d:
+; GCN: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+define amdgpu_ps float @atomic_or_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_xor_1d:
+; GCN: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+define amdgpu_ps float @atomic_xor_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_inc_1d:
+; GCN: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+define amdgpu_ps float @atomic_inc_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_dec_1d:
+; GCN: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+define amdgpu_ps float @atomic_dec_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_cmpswap_1d:
+; GCN: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc{{$}}
+define amdgpu_ps float @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_add_2d:
+; GCN: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc{{$}}
+define amdgpu_ps float @atomic_add_2d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_add_3d:
+; GCN: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc{{$}}
+define amdgpu_ps float @atomic_add_3d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %r) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_add_cube:
+; GCN: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da{{$}}
+define amdgpu_ps float @atomic_add_cube(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %face) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_add_1darray:
+; GCN: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da{{$}}
+define amdgpu_ps float @atomic_add_1darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %slice) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_add_2darray:
+; GCN: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da{{$}}
+define amdgpu_ps float @atomic_add_2darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_add_2dmsaa:
+; GCN: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc{{$}}
+define amdgpu_ps float @atomic_add_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %fragid) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_add_2darraymsaa:
+; GCN: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da{{$}}
+define amdgpu_ps float @atomic_add_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+; GCN-LABEL: {{^}}atomic_add_1d_slc:
+; GCN: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc{{$}}
+define amdgpu_ps float @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+main_body:
+  %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
+  %out = bitcast i32 %v to float
+  ret float %out
+}
+
+declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
+
+declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
new file mode 100644
index 0000000..f43a799
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
@@ -0,0 +1,115 @@
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s
+
+; GCN-LABEL: {{^}}image_load_f16:
+; GCN: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}}
+define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+main_body:
+  %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  ret half %tex
+}
+
+; GCN-LABEL: {{^}}image_load_v2f16:
+; UNPACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
+; PACKED: image_load v0, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
+define amdgpu_ps float @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+main_body:
+  %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  %r = bitcast <2 x half> %tex to float
+  ret float %r
+}
+
+; GCN-LABEL: {{^}}image_load_v4f16:
+; UNPACKED: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
+; PACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
+define amdgpu_ps <2 x float> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+main_body:
+  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  %r = bitcast <4 x half> %tex to <2 x float>
+  ret <2 x float> %r
+}
+
+; GCN-LABEL: {{^}}image_load_mip_v4f16:
+; UNPACKED: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm d16{{$}}
+; PACKED: image_load_mip v[0:1], v[0:3], s[0:7] dmask:0xf unorm d16{{$}}
+define amdgpu_ps <2 x float> @image_load_mip_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
+main_body:
+  %tex = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  %r = bitcast <4 x half> %tex to <2 x float>
+  ret <2 x float> %r
+}
+
+; GCN-LABEL: {{^}}image_load_3d_v2f16:
+; UNPACKED: image_load v[0:1], v[0:3], s[0:7] dmask:0x3 unorm d16{{$}}
+; PACKED: image_load v0, v[0:3], s[0:7] dmask:0x3 unorm d16{{$}}
+define amdgpu_ps float @image_load_3d_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
+main_body:
+  %tex = call <2 x half> @llvm.amdgcn.image.load.3d.v2f16.i32(i32 3, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
+  %x = bitcast <2 x half> %tex to float
+  ret float %x
+}
+
+; GCN-LABEL: {{^}}image_store_f16
+; GCN: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}}
+define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) {
+main_body:
+  call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}image_store_v2f16
+; UNPACKED: v_lshrrev_b32_e32
+; UNPACKED: v_and_b32_e32
+; UNPACKED: image_store v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
+; PACKED: image_store v2, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
+define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, float %in) {
+main_body:
+  %data = bitcast float %in to <2 x half>
+  call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %data, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}image_store_v4f16
+; UNPACKED: v_lshrrev_b32_e32
+; UNPACKED: v_and_b32_e32
+; UNPACKED: v_lshrrev_b32_e32
+; UNPACKED: v_and_b32_e32
+; UNPACKED: image_store v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
+; PACKED: image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
+define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x float> %in) {
+main_body:
+  %data = bitcast <2 x float> %in to <4 x half>
+  call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %data, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}image_store_mip_1d_v4f16
+; UNPACKED: v_lshrrev_b32_e32
+; UNPACKED: v_and_b32_e32
+; UNPACKED: v_lshrrev_b32_e32
+; UNPACKED: v_and_b32_e32
+; UNPACKED: image_store_mip v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
+; PACKED: image_store_mip v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
+define amdgpu_ps void @image_store_mip_1d_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %mip, <2 x float> %in) {
+main_body:
+  %data = bitcast <2 x float> %in to <4 x half>
+  call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half> %data, i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+declare half @llvm.amdgcn.image.load.2d.f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <2 x half> @llvm.amdgcn.image.load.3d.v2f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+
+declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.3d.v2f16.i32(<2 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
new file mode 100644
index 0000000..8234e2c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
@@ -0,0 +1,420 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}load_1d:
+; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
+define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_2d:
+; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}}
+define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_3d:
+; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
+define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_cube:
+; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
+define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_1darray:
+; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da{{$}}
+define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_2darray:
+; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
+define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_2dmsaa:
+; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
+define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_2darraymsaa:
+; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
+define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_mip_1d:
+; GCN: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}}
+define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %mip) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_mip_2d:
+; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
+define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_mip_3d:
+; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
+define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r, i32 %mip) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_mip_cube:
+; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
+define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_mip_1darray:
+; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
+define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice, i32 %mip) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_mip_2darray:
+; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
+define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}store_1d:
+; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm{{$}}
+define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
+main_body:
+  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_2d:
+; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}}
+define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
+main_body:
+  call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_3d:
+; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
+define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r) {
+main_body:
+  call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_cube:
+; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
+define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) {
+main_body:
+  call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_1darray:
+; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da{{$}}
+define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice) {
+main_body:
+  call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_2darray:
+; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
+define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) {
+main_body:
+  call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_2dmsaa:
+; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
+define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %fragid) {
+main_body:
+  call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_2darraymsaa:
+; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
+define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
+main_body:
+  call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_mip_1d:
+; GCN: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}}
+define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %mip) {
+main_body:
+  call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_mip_2d:
+; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
+define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %mip) {
+main_body:
+  call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_mip_3d:
+; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
+define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r, i32 %mip) {
+main_body:
+  call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_mip_cube:
+; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
+define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) {
+main_body:
+  call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_mip_1darray:
+; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
+define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice, i32 %mip) {
+main_body:
+  call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_mip_2darray:
+; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
+define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) {
+main_body:
+  call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}getresinfo_1d:
+; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
+define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, i32 %mip) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}getresinfo_2d:
+; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
+define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, i32 %mip) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}getresinfo_3d:
+; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
+define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, i32 %mip) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}getresinfo_cube:
+; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
+define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, i32 %mip) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}getresinfo_1darray:
+; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
+define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, i32 %mip) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}getresinfo_2darray:
+; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
+define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, i32 %mip) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}getresinfo_2dmsaa:
+; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
+define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, i32 %mip) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}getresinfo_2darraymsaa:
+; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
+define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, i32 %mip) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_1d_V1:
+; GCN: image_load v0, v0, s[0:7] dmask:0x8 unorm{{$}}
+define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+  %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret float %v
+}
+
+; GCN-LABEL: {{^}}load_1d_V2:
+; GCN: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm{{$}}
+define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+  %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <2 x float> %v
+}
+
+; GCN-LABEL: {{^}}store_1d_V1:
+; GCN: image_store v0, v1, s[0:7] dmask:0x2 unorm{{$}}
+define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, i32 %s) {
+main_body:
+  call void @llvm.amdgcn.image.store.1d.f32.i32(float %vdata, i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_1d_V2:
+; GCN: image_store v[0:1], v2, s[0:7] dmask:0xc unorm{{$}}
+define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, i32 %s) {
+main_body:
+  call void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float> %vdata, i32 12, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}load_1d_glc:
+; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc{{$}}
+define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_1d_slc:
+; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc{{$}}
+define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_1d_glc_slc:
+; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc{{$}}
+define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}store_1d_glc:
+; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc{{$}}
+define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
+main_body:
+  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_1d_slc:
+; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc{{$}}
+define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
+main_body:
+  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_1d_glc_slc:
+; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc{{$}}
+define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
+main_body:
+  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
+  ret void
+}
+
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+
+declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+
+declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
+
+declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1
+declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
new file mode 100644
index 0000000..eb633e7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s
+
+; GCN-LABEL: {{^}}image_gather4_b_2d_v4f16:
+; UNPACKED: image_gather4_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x4 d16{{$}}
+; PACKED: image_gather4_b v[0:1], v[0:3], s[0:7], s[8:11] dmask:0x4 d16{{$}}
+define amdgpu_ps <2 x float> @image_gather4_b_2d_v4f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
+main_body:
+  %tex = call <4 x half> @llvm.amdgcn.image.gather4.b.2d.v4f16.f32.f32(i32 4, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  %r = bitcast <4 x half> %tex to <2 x float>
+  ret <2 x float> %r
+}
+
+declare <4 x half> @llvm.amdgcn.image.gather4.b.2d.v4f16.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll
new file mode 100644
index 0000000..f7fe050
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll
@@ -0,0 +1,160 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+
+; GCN-LABEL: {{^}}gather4_2d:
+; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_cube:
+; GCN: image_gather4 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 da{{$}}
+define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32 1, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_2darray:
+; GCN: image_gather4 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 da{{$}}
+define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32 1, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_2d:
+; GCN: image_gather4_c v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_cl_2d:
+; GCN: image_gather4_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 1, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_cl_2d:
+; GCN: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_b_2d:
+; GCN: image_gather4_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_b_2d:
+; GCN: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_b_cl_2d:
+; GCN: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_b_cl_2d:
+; GCN: image_gather4_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_l_2d:
+; GCN: image_gather4_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 1, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_l_2d:
+; GCN: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_lz_2d:
+; GCN: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_lz_2d:
+; GCN: image_gather4_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_2d_dmask_2:
+; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2{{$}}
+define amdgpu_ps <4 x float> @gather4_2d_dmask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 2, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_2d_dmask_4:
+; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4{{$}}
+define amdgpu_ps <4 x float> @gather4_2d_dmask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 4, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_2d_dmask_8:
+; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8{{$}}
+define amdgpu_ps <4 x float> @gather4_2d_dmask_8(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 8, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
new file mode 100644
index 0000000..9619304
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s
+
+; GCN-LABEL: {{^}}image_sample_2d_f16:
+; GCN: image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 d16{{$}}
+define amdgpu_ps half @image_sample_2d_f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+main_body:
+  %tex = call half @llvm.amdgcn.image.sample.2d.f16.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  ret half %tex
+}
+
+; GCN-LABEL: {{^}}image_sample_c_d_1d_v2f16:
+; UNPACKED: image_sample_c_d v[0:1], v[0:3], s[0:7], s[8:11] dmask:0x3 d16{{$}}
+; PACKED: image_sample_c_d v0, v[0:3], s[0:7], s[8:11] dmask:0x3 d16{{$}}
+define amdgpu_ps float @image_sample_c_d_1d_v2f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
+main_body:
+  %tex = call <2 x half> @llvm.amdgcn.image.sample.c.d.1d.v2f16.f32.f32(i32 3, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  %r = bitcast <2 x half> %tex to float
+  ret float %r
+}
+
+; GCN-LABEL: {{^}}image_sample_b_2d_v4f16:
+; UNPACKED: image_sample_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf d16{{$}}
+; PACKED: image_sample_b v[0:1], v[0:3], s[0:7], s[8:11] dmask:0xf d16{{$}}
+define amdgpu_ps <2 x float> @image_sample_b_2d_v4f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
+main_body:
+  %tex = call <4 x half> @llvm.amdgcn.image.sample.b.2d.v4f16.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  %r = bitcast <4 x half> %tex to <2 x float>
+  ret <2 x float> %r
+}
+
+declare half @llvm.amdgcn.image.sample.2d.f16.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <2 x half> @llvm.amdgcn.image.sample.c.d.1d.v2f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x half> @llvm.amdgcn.image.sample.b.2d.v4f16.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
new file mode 100644
index 0000000..b85b343
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
@@ -0,0 +1,459 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+
+; GCN-LABEL: {{^}}sample_1d:
+; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_2d:
+; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_3d:
+; GCN: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cube:
+; GCN: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf da{{$}}
+define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_1darray:
+; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da{{$}}
+define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_2darray:
+; GCN: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf da{{$}}
+define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_1d:
+; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_2d:
+; GCN: image_sample_c v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cl_1d:
+; GCN: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cl_2d:
+; GCN: image_sample_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_cl_1d:
+; GCN: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_cl_2d:
+; GCN: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_b_1d:
+; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_b_2d:
+; GCN: image_sample_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_b_1d:
+; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_b_2d:
+; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_b_cl_1d:
+; GCN: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_b_cl_2d:
+; GCN: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_b_cl_1d:
+; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_b_cl_2d:
+; GCN: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_d_1d:
+; GCN: image_sample_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_d_2d:
+; GCN: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_d_1d:
+; GCN: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_d_2d:
+; GCN: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_d_cl_1d:
+; GCN: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_d_cl_2d:
+; GCN: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_d_cl_1d:
+; GCN: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_d_cl_2d:
+; GCN: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cd_1d:
+; GCN: image_sample_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cd_2d:
+; GCN: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_cd_1d:
+; GCN: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_cd_2d:
+; GCN: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cd_cl_1d:
+; GCN: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cd_cl_2d:
+; GCN: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_cd_cl_1d:
+; GCN: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_cd_cl_2d:
+; GCN: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_l_1d:
+; GCN: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_l_2d:
+; GCN: image_sample_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_l_1d:
+; GCN: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_l_2d:
+; GCN: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_lz_1d:
+; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_lz_2d:
+; GCN: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_lz_1d:
+; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_lz_2d:
+; GCN: image_sample_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_d_o_2darray_V1:
+; GCN: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da{{$}}
+define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
+main_body:
+  %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret float %v
+}
+
+; GCN-LABEL: {{^}}sample_c_d_o_2darray_V2:
+; GCN: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da{{$}}
+define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
+main_body:
+  %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <2 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_1d_unorm:
+; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm{{$}}
+define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 1, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_1d_glc:
+; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc{{$}}
+define amdgpu_ps <4 x float> @sample_1d_glc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 1)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_1d_slc:
+; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc{{$}}
+define amdgpu_ps <4 x float> @sample_1d_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 2)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_1d_glc_slc:
+; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc{{$}}
+define amdgpu_ps <4 x float> @sample_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 3)
+  ret <4 x float> %v
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
-- 
2.7.4