From ac272635124261c2c4c1f223a27e9dba542343e3 Mon Sep 17 00:00:00 2001
From: Daniel Sanders <daniel.sanders@imgtec.com>
Date: Fri, 23 May 2014 13:18:02 +0000
Subject: [PATCH] [mips][mips64r6] [ls][dw][lr] are not available in
 MIPS32r6/MIPS64r6

Summary:
Instead the system is required to provide some means of handling unaligned
load/store without special instructions. Options include full hardware
support, full trap-and-emulate, and hybrids such as hardware support within
a cache line and trap-and-emulate for multi-line accesses.

MipsSETargetLowering::allowsUnalignedMemoryAccesses() has been configured to
assume that unaligned accesses are 'fast' on the basis that I expect few
hardware implementations will opt for pure-software handling of unaligned
accesses. The ones that do handle it purely in software can override this.

mips64-load-store-left-right.ll has been merged into load-store-left-right.ll

The stricter testing revealed a Bits!=Bytes bug in passByValArg(). This has
been fixed and the variables renamed to clarify the units they hold.

Reviewers: zoran.jovanovic, jkolek, vmedic

Reviewed By: vmedic

Differential Revision: http://reviews.llvm.org/D3872

llvm-svn: 209512
---
 llvm/lib/Target/Mips/Mips32r6InstrInfo.td          |   1 -
 llvm/lib/Target/Mips/Mips64InstrInfo.td            |   8 +-
 llvm/lib/Target/Mips/Mips64r6InstrInfo.td          |   1 -
 llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp          |   5 +-
 llvm/lib/Target/Mips/MipsISelLowering.cpp          |  61 +--
 llvm/lib/Target/Mips/MipsInstrInfo.td              |  15 +-
 llvm/lib/Target/Mips/MipsSEISelLowering.cpp        |  10 +
 llvm/lib/Target/Mips/MipsSubtarget.h               |   7 +-
 llvm/test/CodeGen/Mips/load-store-left-right.ll    | 434 ++++++++++++++++++++-
 .../CodeGen/Mips/mips64load-store-left-right.ll    |  75 ----
 llvm/test/CodeGen/Mips/unalignedload.ll            |  67 +++-
 .../MC/Mips/mips32r6/invalid-mips1-wrong-error.s   |  15 +
 .../MC/Mips/mips64r6/invalid-mips1-wrong-error.s   |  15 +
 .../MC/Mips/mips64r6/invalid-mips3-wrong-error.s   |  23 ++
 llvm/test/MC/Mips/mips64r6/invalid-mips3.s         |   8 +
 15 files changed, 602 insertions(+), 143 deletions(-)
 delete mode 100644 llvm/test/CodeGen/Mips/mips64load-store-left-right.ll
 create mode 100644 llvm/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s
 create mode 100644 llvm/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s
 create mode 100644 llvm/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s
 create mode 100644 llvm/test/MC/Mips/mips64r6/invalid-mips3.s

diff --git a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
index a1a3f6b..9755159 100644
--- a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -35,7 +35,6 @@ include "Mips32r6InstrFormats.td"
 // Removed: jalx
 // Removed: ldxc1
 // Removed: luxc1
-// Removed: lwl, lwr, lwle, lwre, swl, swr, swle, swre
 // Removed: lwxc1
 // Removed: madd.[ds], nmadd.[ds], nmsub.[ds], sub.[ds]
 // Removed: mfhi, mflo, mthi, mtlo, madd, maddu, msub, msubu, mul
diff --git a/llvm/lib/Target/Mips/Mips64InstrInfo.td b/llvm/lib/Target/Mips/Mips64InstrInfo.td
index df49aa8..43103e6 100644
--- a/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -155,13 +155,13 @@ def SWR64 : StoreLeftRight<"swr", MipsSWR, GPR64Opnd, II_SWR>, LW_FM<0x2e>;
 }
 
 def LDL   : LoadLeftRight<"ldl", MipsLDL, GPR64Opnd, II_LDL>, LW_FM<0x1a>,
-            ISA_MIPS3;
+            ISA_MIPS3_NOT_32R6_64R6;
 def LDR   : LoadLeftRight<"ldr", MipsLDR, GPR64Opnd, II_LDR>, LW_FM<0x1b>,
-            ISA_MIPS3;
+            ISA_MIPS3_NOT_32R6_64R6;
 def SDL   : StoreLeftRight<"sdl", MipsSDL, GPR64Opnd, II_SDL>, LW_FM<0x2c>,
-            ISA_MIPS3;
+            ISA_MIPS3_NOT_32R6_64R6;
 def SDR   : StoreLeftRight<"sdr", MipsSDR, GPR64Opnd, II_SDR>, LW_FM<0x2d>,
-            ISA_MIPS3;
+            ISA_MIPS3_NOT_32R6_64R6;
 
 /// Load-linked, Store-conditional
 def LLD : LLBase<"lld", GPR64Opnd>, LW_FM<0x34>, ISA_MIPS3;
diff --git a/llvm/lib/Target/Mips/Mips64r6InstrInfo.td b/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
index 2e87a60..f971218 100644
--- a/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
@@ -17,7 +17,6 @@
 // Removed: daddi
 // Removed: ddiv, ddivu, dmult, dmultu
 // Removed: div, divu
-// Removed: ldl, ldr, ldle, ldre, sdl, sdr, sdle, sdre
 
 //===----------------------------------------------------------------------===//
 //
diff --git a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 4eb9d43..90cff63 100644
--- a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -202,8 +202,9 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
 #ifndef NDEBUG
   case ISD::LOAD:
   case ISD::STORE:
-    assert(cast<MemSDNode>(Node)->getMemoryVT().getSizeInBits() / 8 <=
-           cast<MemSDNode>(Node)->getAlignment() &&
+    assert((Subtarget.systemSupportsUnalignedAccess() ||
+            cast<MemSDNode>(Node)->getMemoryVT().getSizeInBits() / 8 <=
+            cast<MemSDNode>(Node)->getAlignment()) &&
            "Unexpected unaligned loads/stores.");
     break;
 #endif
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index ff20988..bfe5ea1 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -1941,6 +1941,9 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   LoadSDNode *LD = cast<LoadSDNode>(Op);
   EVT MemVT = LD->getMemoryVT();
 
+  if (Subtarget->systemSupportsUnalignedAccess())
+    return Op;
+
   // Return if load is aligned or if MemVT is neither i32 nor i64.
   if ((LD->getAlignment() >= MemVT.getSizeInBits() / 8) ||
       ((MemVT != MVT::i32) && (MemVT != MVT::i64)))
@@ -2064,7 +2067,8 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   EVT MemVT = SD->getMemoryVT();
 
   // Lower unaligned integer stores.
-  if ((SD->getAlignment() < MemVT.getSizeInBits() / 8) &&
+  if (!Subtarget->systemSupportsUnalignedAccess() &&
+      (SD->getAlignment() < MemVT.getSizeInBits() / 8) &&
       ((MemVT == MVT::i32) || (MemVT == MVT::i64)))
     return lowerUnalignedIntStore(SD, DAG, Subtarget->isLittle());
 
@@ -3485,21 +3489,22 @@ passByValArg(SDValue Chain, SDLoc DL,
              MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
              const MipsCC &CC, const ByValArgInfo &ByVal,
              const ISD::ArgFlagsTy &Flags, bool isLittle) const {
-  unsigned ByValSize = Flags.getByValSize();
-  unsigned Offset = 0; // Offset in # of bytes from the beginning of struct.
-  unsigned RegSize = CC.regSize();
-  unsigned Alignment = std::min(Flags.getByValAlign(), RegSize);
-  EVT PtrTy = getPointerTy(), RegTy = MVT::getIntegerVT(RegSize * 8);
+  unsigned ByValSizeInBytes = Flags.getByValSize();
+  unsigned OffsetInBytes = 0; // From beginning of struct
+  unsigned RegSizeInBytes = CC.regSize();
+  unsigned Alignment = std::min(Flags.getByValAlign(), RegSizeInBytes);
+  EVT PtrTy = getPointerTy(), RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
 
   if (ByVal.NumRegs) {
     const MCPhysReg *ArgRegs = CC.intArgRegs();
-    bool LeftoverBytes = (ByVal.NumRegs * RegSize > ByValSize);
+    bool LeftoverBytes = (ByVal.NumRegs * RegSizeInBytes > ByValSizeInBytes);
     unsigned I = 0;
 
     // Copy words to registers.
-    for (; I < ByVal.NumRegs - LeftoverBytes; ++I, Offset += RegSize) {
+    for (; I < ByVal.NumRegs - LeftoverBytes;
+         ++I, OffsetInBytes += RegSizeInBytes) {
       SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
-                                    DAG.getConstant(Offset, PtrTy));
+                                    DAG.getConstant(OffsetInBytes, PtrTy));
       SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr,
                                     MachinePointerInfo(), false, false, false,
                                     Alignment);
@@ -3509,38 +3514,38 @@ passByValArg(SDValue Chain, SDLoc DL,
     }
 
     // Return if the struct has been fully copied.
-    if (ByValSize == Offset)
+    if (ByValSizeInBytes == OffsetInBytes)
       return;
 
     // Copy the remainder of the byval argument with sub-word loads and shifts.
     if (LeftoverBytes) {
-      assert((ByValSize > Offset) && (ByValSize < Offset + RegSize) &&
-             "Size of the remainder should be smaller than RegSize.");
+      assert((ByValSizeInBytes > OffsetInBytes) &&
+             (ByValSizeInBytes < OffsetInBytes + RegSizeInBytes) &&
+             "Size of the remainder should be smaller than RegSizeInBytes.");
       SDValue Val;
 
-      for (unsigned LoadSize = RegSize / 2, TotalSizeLoaded = 0;
-           Offset < ByValSize; LoadSize /= 2) {
-        unsigned RemSize = ByValSize - Offset;
+      for (unsigned LoadSizeInBytes = RegSizeInBytes / 2, TotalBytesLoaded = 0;
+           OffsetInBytes < ByValSizeInBytes; LoadSizeInBytes /= 2) {
+        unsigned RemainingSizeInBytes = ByValSizeInBytes - OffsetInBytes;
 
-        if (RemSize < LoadSize)
+        if (RemainingSizeInBytes < LoadSizeInBytes)
           continue;
 
         // Load subword.
         SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
-                                      DAG.getConstant(Offset, PtrTy));
-        SDValue LoadVal =
-          DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr,
-                         MachinePointerInfo(), MVT::getIntegerVT(LoadSize * 8),
-                         false, false, Alignment);
+                                      DAG.getConstant(OffsetInBytes, PtrTy));
+        SDValue LoadVal = DAG.getExtLoad(
+            ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(),
+            MVT::getIntegerVT(LoadSizeInBytes * 8), false, false, Alignment);
         MemOpChains.push_back(LoadVal.getValue(1));
 
         // Shift the loaded value.
         unsigned Shamt;
 
         if (isLittle)
-          Shamt = TotalSizeLoaded;
+          Shamt = TotalBytesLoaded * 8;
         else
-          Shamt = (RegSize - (TotalSizeLoaded + LoadSize)) * 8;
+          Shamt = (RegSizeInBytes - (TotalBytesLoaded + LoadSizeInBytes)) * 8;
 
         SDValue Shift = DAG.getNode(ISD::SHL, DL, RegTy, LoadVal,
                                     DAG.getConstant(Shamt, MVT::i32));
@@ -3550,9 +3555,9 @@ passByValArg(SDValue Chain, SDLoc DL,
         else
           Val = Shift;
 
-        Offset += LoadSize;
-        TotalSizeLoaded += LoadSize;
-        Alignment = std::min(Alignment, LoadSize);
+        OffsetInBytes += LoadSizeInBytes;
+        TotalBytesLoaded += LoadSizeInBytes;
+        Alignment = std::min(Alignment, LoadSizeInBytes);
       }
 
       unsigned ArgReg = ArgRegs[ByVal.FirstIdx + I];
@@ -3562,9 +3567,9 @@ passByValArg(SDValue Chain, SDLoc DL,
   }
 
   // Copy remainder of byval arg to it with memcpy.
-  unsigned MemCpySize = ByValSize - Offset;
+  unsigned MemCpySize = ByValSizeInBytes - OffsetInBytes;
   SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
-                            DAG.getConstant(Offset, PtrTy));
+                            DAG.getConstant(OffsetInBytes, PtrTy));
   SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr,
                             DAG.getIntPtrConstant(ByVal.Address));
   Chain = DAG.getMemcpy(Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, PtrTy),
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.td b/llvm/lib/Target/Mips/MipsInstrInfo.td
index dbcd674..b665019 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -225,6 +225,9 @@ class ISA_MIPS1_NOT_32R6_64R6 {
 }
 class ISA_MIPS2    { list<Predicate> InsnPredicates = [HasMips2]; }
 class ISA_MIPS3    { list<Predicate> InsnPredicates = [HasMips3]; }
+class ISA_MIPS3_NOT_32R6_64R6 {
+  list<Predicate> InsnPredicates = [HasMips3, NotMips32r6, NotMips64r6];
+}
 class ISA_MIPS32   { list<Predicate> InsnPredicates = [HasMips32]; }
 class ISA_MIPS32R2 { list<Predicate> InsnPredicates = [HasMips32r2]; }
 class ISA_MIPS64   { list<Predicate> InsnPredicates = [HasMips64]; }
@@ -1087,10 +1090,14 @@ def SW  : Store<"sw", GPR32Opnd, store, II_SW>, MMRel, LW_FM<0x2b>;
 /// load/store left/right
 let EncodingPredicates = []<Predicate>, // FIXME: Lack of HasStdEnc is probably a bug
     AdditionalPredicates = [NotInMicroMips] in {
-def LWL : LoadLeftRight<"lwl", MipsLWL, GPR32Opnd, II_LWL>, LW_FM<0x22>;
-def LWR : LoadLeftRight<"lwr", MipsLWR, GPR32Opnd, II_LWR>, LW_FM<0x26>;
-def SWL : StoreLeftRight<"swl", MipsSWL, GPR32Opnd, II_SWL>, LW_FM<0x2a>;
-def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, II_SWR>, LW_FM<0x2e>;
+def LWL : LoadLeftRight<"lwl", MipsLWL, GPR32Opnd, II_LWL>, LW_FM<0x22>,
+          ISA_MIPS1_NOT_32R6_64R6;
+def LWR : LoadLeftRight<"lwr", MipsLWR, GPR32Opnd, II_LWR>, LW_FM<0x26>,
+          ISA_MIPS1_NOT_32R6_64R6;
+def SWL : StoreLeftRight<"swl", MipsSWL, GPR32Opnd, II_SWL>, LW_FM<0x2a>,
+          ISA_MIPS1_NOT_32R6_64R6;
+def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, II_SWR>, LW_FM<0x2e>,
+          ISA_MIPS1_NOT_32R6_64R6;
 }
 
 def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM;
diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index eb9a819..969d730 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -254,6 +254,16 @@ MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
                                                     bool *Fast) const {
   MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
 
+  if (Subtarget->systemSupportsUnalignedAccess()) {
+    // MIPS32r6/MIPS64r6 is required to support unaligned access. It's
+    // implementation defined whether this is handled by hardware, software, or
+    // a hybrid of the two but it's expected that most implementations will
+    // handle the majority of cases in hardware.
+    if (Fast)
+      *Fast = true;
+    return true;
+  }
+
   switch (SVT) {
   case MVT::i64:
   case MVT::i32:
diff --git a/llvm/lib/Target/Mips/MipsSubtarget.h b/llvm/lib/Target/Mips/MipsSubtarget.h
index d57e678..373f481 100644
--- a/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -234,7 +234,12 @@ public:
   /// \brief Reset the subtarget for the Mips target.
   void resetSubtarget(MachineFunction *MF);
 
-
+  /// Does the system support unaligned memory access.
+  ///
+  /// MIPS32r6/MIPS64r6 require full unaligned access support but does not
+  /// specify which component of the system provides it. Hardware, software, and
+  /// hybrid implementations are all valid.
+  bool systemSupportsUnalignedAccess() const { return hasMips32r6(); }
 };
 } // End llvm namespace
 
diff --git a/llvm/test/CodeGen/Mips/load-store-left-right.ll b/llvm/test/CodeGen/Mips/load-store-left-right.ll
index d0928ee..a3f5ebf 100644
--- a/llvm/test/CodeGen/Mips/load-store-left-right.ll
+++ b/llvm/test/CodeGen/Mips/load-store-left-right.ll
@@ -1,29 +1,439 @@
-; RUN: llc -march=mipsel < %s | FileCheck  -check-prefix=EL %s
-; RUN: llc -march=mips < %s | FileCheck  -check-prefix=EB %s
+; RUN: llc -march=mipsel   -mcpu=mips32              < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32 -check-prefix=MIPS32-EL %s
+; RUN: llc -march=mips     -mcpu=mips32              < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32 -check-prefix=MIPS32-EB %s
+; RUN: llc -march=mipsel   -mcpu=mips32r2            < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32 -check-prefix=MIPS32-EL %s
+; RUN: llc -march=mips     -mcpu=mips32r2            < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32 -check-prefix=MIPS32-EB %s
+; RUN: llc -march=mipsel   -mcpu=mips32r6            < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32R6 -check-prefix=MIPS32R6-EL %s
+; RUN: llc -march=mips     -mcpu=mips32r6            < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32R6 -check-prefix=MIPS32R6-EB %s
+; RUN: llc -march=mips64el -mcpu=mips4    -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EL %s
+; RUN: llc -march=mips64   -mcpu=mips4    -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EB %s
+; RUN: llc -march=mips64el -mcpu=mips64   -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EL %s
+; RUN: llc -march=mips64   -mcpu=mips64   -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EB %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EL %s
+; RUN: llc -march=mips64   -mcpu=mips64r2 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EB %s
+; RUN: llc -march=mips64el -mcpu=mips64r6 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64R6 -check-prefix=MIPS64R6-EL %s
+; RUN: llc -march=mips64   -mcpu=mips64r6 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64R6 -check-prefix=MIPS64R6-EB %s
 
+%struct.SLL = type { i64 }
 %struct.SI = type { i32 }
+%struct.SUI = type { i32 }
 
+@sll = common global %struct.SLL zeroinitializer, align 1
 @si = common global %struct.SI zeroinitializer, align 1
+@sui = common global %struct.SUI zeroinitializer, align 1
 
-define i32 @foo_load_i() nounwind readonly {
+define i32 @load_SI() nounwind readonly {
 entry:
-; EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
-; EL: lwr $[[R0]], 0($[[R1]])
-; EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: lwr $[[R0]], 3($[[R1]])
+; ALL-LABEL: load_SI:
+
+; MIPS32-EL:     lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS32-EL:     lwr $[[R0]], 0($[[R1]])
+
+; MIPS32-EB:     lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS32-EB:     lwr $[[R0]], 3($[[R1]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(si)(
+; MIPS32R6:      lw $2, 0($[[PTR]])
+
+; MIPS64-EL:     lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS64-EL:     lwr $[[R0]], 0($[[R1]])
+
+; MIPS64-EB:     lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     lwr $[[R0]], 3($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(si)(
+; MIPS64R6:      lw $2, 0($[[PTR]])
 
   %0 = load i32* getelementptr inbounds (%struct.SI* @si, i32 0, i32 0), align 1
   ret i32 %0
 }
 
-define void @foo_store_i(i32 %a) nounwind {
+define void @store_SI(i32 %a) nounwind {
 entry:
-; EL: swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
-; EL: swr $[[R0]], 0($[[R1]])
-; EB: swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: swr $[[R0]], 3($[[R1]])
+; ALL-LABEL: store_SI:
+
+; MIPS32-EL:     swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS32-EL:     swr $[[R0]], 0($[[R1]])
+
+; MIPS32-EB:     swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS32-EB:     swr $[[R0]], 3($[[R1]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(si)(
+; MIPS32R6:      sw $4, 0($[[PTR]])
+
+; MIPS64-EL:     swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS64-EL:     swr $[[R0]], 0($[[R1]])
+
+; MIPS64-EB:     swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     swr $[[R0]], 3($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(si)(
+; MIPS64R6:      sw $4, 0($[[PTR]])
 
   store i32 %a, i32* getelementptr inbounds (%struct.SI* @si, i32 0, i32 0), align 1
   ret void
 }
 
+define i64 @load_SLL() nounwind readonly {
+entry:
+; ALL-LABEL: load_SLL:
+
+; MIPS32-EL:     lwl $2, 3($[[R1:[0-9]+]])
+; MIPS32-EL:     lwr $2, 0($[[R1]])
+; MIPS32-EL:     lwl $3, 7($[[R1:[0-9]+]])
+; MIPS32-EL:     lwr $3, 4($[[R1]])
+
+; MIPS32-EB:     lwl $2, 0($[[R1:[0-9]+]])
+; MIPS32-EB:     lwr $2, 3($[[R1]])
+; MIPS32-EB:     lwl $3, 4($[[R1:[0-9]+]])
+; MIPS32-EB:     lwr $3, 7($[[R1]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(sll)(
+; MIPS32R6-DAG:  lw $2, 0($[[PTR]])
+; MIPS32R6-DAG:  lw $3, 4($[[PTR]])
+
+; MIPS64-EL:     ldl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]])
+; MIPS64-EL:     ldr $[[R0]], 0($[[R1]])
+
+; MIPS64-EB:     ldl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     ldr $[[R0]], 7($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(sll)(
+; MIPS64R6:      ld $2, 0($[[PTR]])
+
+  %0 = load i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
+  ret i64 %0
+}
+
+define i64 @load_SI_sext_to_i64() nounwind readonly {
+entry:
+; ALL-LABEL: load_SI_sext_to_i64:
+
+; MIPS32-EL:     lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS32-EL:     lwr $[[R0]], 0($[[R1]])
+
+; MIPS32-EB:     lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS32-EB:     lwr $[[R0]], 3($[[R1]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(si)(
+; MIPS32R6-EL:   lw $2, 0($[[PTR]])
+; MIPS32R6-EL:   sra $3, $2, 31
+; MIPS32R6-EB:   lw $3, 0($[[PTR]])
+; MIPS32R6-EB:   sra $2, $3, 31
+
+; MIPS64-EL:     lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS64-EL:     lwr $[[R0]], 0($[[R1]])
+
+; MIPS64-EB:     lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     lwr $[[R0]], 3($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(si)(
+; MIPS64R6:      lw $2, 0($[[PTR]])
+
+  %0 = load i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
+  %conv = sext i32 %0 to i64
+  ret i64 %conv
+}
+
+define i64 @load_UI() nounwind readonly {
+entry:
+; ALL-LABEL: load_UI:
+
+; MIPS32-EL-DAG: lwl $[[R2:2]], 3($[[R1:[0-9]+]])
+; MIPS32-EL-DAG: lwr $[[R2]],   0($[[R1]])
+; MIPS32-EL-DAG: addiu $3, $zero, 0
+
+; MIPS32-EB-DAG: lwl $[[R2:3]], 0($[[R1:[0-9]+]])
+; MIPS32-EB-DAG: lwr $[[R2]],   3($[[R1]])
+; MIPS32-EB-DAG: addiu $2, $zero, 0
+
+; MIPS32R6:        lw $[[PTR:[0-9]+]], %got(sui)(
+; MIPS32R6-EL-DAG: lw $2, 0($[[PTR]])
+; MIPS32R6-EL-DAG: addiu $3, $zero, 0
+; MIPS32R6-EB-DAG: lw $3, 0($[[PTR]])
+; MIPS32R6-EB-DAG: addiu $2, $zero, 0
+
+; MIPS64-EL-DAG: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS64-EL-DAG: lwr $[[R0]], 0($[[R1]])
+; MIPS64-EL-DAG: daddiu $[[R2:[0-9]+]], $zero, 1
+; MIPS64-EL-DAG: dsll   $[[R3:[0-9]+]], $[[R2]], 32
+; MIPS64-EL-DAG: daddiu $[[R4:[0-9]+]], $[[R3]], -1
+; MIPS64-EL-DAG: and    ${{[0-9]+}}, $[[R0]], $[[R4]]
+
+; MIPS64-EB:     lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     lwr $[[R0]], 3($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(sui)(
+; MIPS64R6:      lwu $2, 0($[[PTR]])
+
+  %0 = load i32* getelementptr inbounds (%struct.SUI* @sui, i64 0, i32 0), align 1
+  %conv = zext i32 %0 to i64
+  ret i64 %conv
+}
+
+define void @store_SLL(i64 %a) nounwind {
+entry:
+; ALL-LABEL: store_SLL:
+
+; MIPS32-EL-DAG: swl $[[A1:4]], 3($[[R1:[0-9]+]])
+; MIPS32-EL-DAG: swr $[[A1]],   0($[[R1]])
+; MIPS32-EL-DAG: swl $[[A2:5]], 7($[[R1:[0-9]+]])
+; MIPS32-EL-DAG: swr $[[A2]],   4($[[R1]])
+
+; MIPS32-EB-DAG: swl $[[A1:4]], 0($[[R1:[0-9]+]])
+; MIPS32-EB-DAG: swr $[[A1]],   3($[[R1]])
+; MIPS32-EB-DAG: swl $[[A1:5]], 4($[[R1:[0-9]+]])
+; MIPS32-EB-DAG: swr $[[A1]],   7($[[R1]])
+
+; MIPS32R6-DAG:  lw $[[PTR:[0-9]+]], %got(sll)(
+; MIPS32R6-DAG:  sw $4, 0($[[PTR]])
+; MIPS32R6-DAG:  sw $5, 4($[[PTR]])
+
+; MIPS64-EL:     sdl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]])
+; MIPS64-EL:     sdr $[[R0]], 0($[[R1]])
+
+; MIPS64-EB:     sdl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     sdr $[[R0]], 7($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(sll)(
+; MIPS64R6:      sd $4, 0($[[PTR]])
+
+  store i64 %a, i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
+  ret void
+}
+
+define void @store_SI_trunc_from_i64(i32 %a) nounwind {
+entry:
+; ALL-LABEL: store_SI_trunc_from_i64:
+
+; MIPS32-EL:     swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS32-EL:     swr $[[R0]], 0($[[R1]])
+
+; MIPS32-EB:     swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS32-EB:     swr $[[R0]], 3($[[R1]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(si)(
+; MIPS32R6:      sw $4, 0($[[PTR]])
+
+; MIPS64-EL:     swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
+; MIPS64-EL:     swr $[[R0]], 0($[[R1]])
+
+; MIPS64-EB:     swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
+; MIPS64-EB:     swr $[[R0]], 3($[[R1]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(si)(
+; MIPS64R6:      sw $4, 0($[[PTR]])
+
+  store i32 %a, i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
+  ret void
+}
+
+;
+; Structures are simply concatenations of the members. They are unaffected by
+; endianness
+;
+
+%struct.S0 = type { i8, i8 }
+@struct_s0 = common global %struct.S0 zeroinitializer, align 1
+%struct.S1 = type { i16, i16 }
+@struct_s1 = common global %struct.S1 zeroinitializer, align 1
+%struct.S2 = type { i32, i32 }
+@struct_s2 = common global %struct.S2 zeroinitializer, align 1
+
+define void @copy_struct_S0() nounwind {
+entry:
+; ALL-LABEL: copy_struct_S0:
+
+; MIPS32-EL:     lw $[[PTR:[0-9]+]], %got(struct_s0)(
+; MIPS32-EB:     lw $[[PTR:[0-9]+]], %got(struct_s0)(
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(struct_s0)(
+; MIPS64-EL:     ld $[[PTR:[0-9]+]], %got_disp(struct_s0)(
+; MIPS64-EB:     ld $[[PTR:[0-9]+]], %got_disp(struct_s0)(
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(struct_s0)(
+
+; FIXME: We should be able to do better than this on MIPS32r6/MIPS64r6 since
+;        we have unaligned halfword load/store available
+; ALL-DAG:       lbu $[[R1:[0-9]+]], 0($[[PTR]])
+; ALL-DAG:       sb $[[R1]], 2($[[PTR]])
+; ALL-DAG:       lbu $[[R1:[0-9]+]], 1($[[PTR]])
+; ALL-DAG:       sb $[[R1]], 3($[[PTR]])
+
+  %0 = load %struct.S0* getelementptr inbounds (%struct.S0* @struct_s0, i32 0), align 1
+  store %struct.S0 %0, %struct.S0* getelementptr inbounds (%struct.S0* @struct_s0, i32 1), align 1
+  ret void
+}
+
+define void @copy_struct_S1() nounwind {
+entry:
+; ALL-LABEL: copy_struct_S1:
+
+; MIPS32-EL:     lw $[[PTR:[0-9]+]], %got(struct_s1)(
+; MIPS32-EB:     lw $[[PTR:[0-9]+]], %got(struct_s1)(
+; MIPS32-DAG:    lbu $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS32-DAG:    sb $[[R1]], 4($[[PTR]])
+; MIPS32-DAG:    lbu $[[R1:[0-9]+]], 1($[[PTR]])
+; MIPS32-DAG:    sb $[[R1]], 5($[[PTR]])
+; MIPS32-DAG:    lbu $[[R1:[0-9]+]], 2($[[PTR]])
+; MIPS32-DAG:    sb $[[R1]], 6($[[PTR]])
+; MIPS32-DAG:    lbu $[[R1:[0-9]+]], 3($[[PTR]])
+; MIPS32-DAG:    sb $[[R1]], 7($[[PTR]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(struct_s1)(
+; MIPS32R6-DAG:  lhu $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS32R6-DAG:  sh $[[R1]], 4($[[PTR]])
+; MIPS32R6-DAG:  lhu $[[R1:[0-9]+]], 2($[[PTR]])
+; MIPS32R6-DAG:  sh $[[R1]], 6($[[PTR]])
+
+; MIPS64-EL:     ld $[[PTR:[0-9]+]], %got_disp(struct_s1)(
+; MIPS64-EB:     ld $[[PTR:[0-9]+]], %got_disp(struct_s1)(
+; MIPS64-DAG:    lbu $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS64-DAG:    sb $[[R1]], 4($[[PTR]])
+; MIPS64-DAG:    lbu $[[R1:[0-9]+]], 1($[[PTR]])
+; MIPS64-DAG:    sb $[[R1]], 5($[[PTR]])
+; MIPS64-DAG:    lbu $[[R1:[0-9]+]], 2($[[PTR]])
+; MIPS64-DAG:    sb $[[R1]], 6($[[PTR]])
+; MIPS64-DAG:    lbu $[[R1:[0-9]+]], 3($[[PTR]])
+; MIPS64-DAG:    sb $[[R1]], 7($[[PTR]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(struct_s1)(
+; MIPS64R6-DAG:  lhu $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS64R6-DAG:  sh $[[R1]], 4($[[PTR]])
+; MIPS64R6-DAG:  lhu $[[R1:[0-9]+]], 2($[[PTR]])
+; MIPS64R6-DAG:  sh $[[R1]], 6($[[PTR]])
+
+  %0 = load %struct.S1* getelementptr inbounds (%struct.S1* @struct_s1, i32 0), align 1
+  store %struct.S1 %0, %struct.S1* getelementptr inbounds (%struct.S1* @struct_s1, i32 1), align 1
+  ret void
+}
+
+define void @copy_struct_S2() nounwind {
+entry:
+; ALL-LABEL: copy_struct_S2:
+
+; MIPS32-EL:     lw $[[PTR:[0-9]+]], %got(struct_s2)(
+; MIPS32-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]])
+; MIPS32-EL-DAG: lwr $[[R1]],        0($[[PTR]])
+; MIPS32-EL-DAG: swl $[[R1]],       11($[[PTR]])
+; MIPS32-EL-DAG: swr $[[R1]],        8($[[PTR]])
+; MIPS32-EL-DAG: lwl $[[R1:[0-9]+]], 7($[[PTR]])
+; MIPS32-EL-DAG: lwr $[[R1]],        4($[[PTR]])
+; MIPS32-EL-DAG: swl $[[R1]],       15($[[PTR]])
+; MIPS32-EL-DAG: swr $[[R1]],       12($[[PTR]])
+
+; MIPS32-EB:     lw $[[PTR:[0-9]+]], %got(struct_s2)(
+; MIPS32-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS32-EB-DAG: lwr $[[R1]],        3($[[PTR]])
+; MIPS32-EB-DAG: swl $[[R1]],        8($[[PTR]])
+; MIPS32-EB-DAG: swr $[[R1]],       11($[[PTR]])
+; MIPS32-EB-DAG: lwl $[[R1:[0-9]+]], 4($[[PTR]])
+; MIPS32-EB-DAG: lwr $[[R1]],        7($[[PTR]])
+; MIPS32-EB-DAG: swl $[[R1]],       12($[[PTR]])
+; MIPS32-EB-DAG: swr $[[R1]],       15($[[PTR]])
+
+; MIPS32R6:      lw $[[PTR:[0-9]+]], %got(struct_s2)(
+; MIPS32R6-DAG:  lw $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS32R6-DAG:  sw $[[R1]],        8($[[PTR]])
+; MIPS32R6-DAG:  lw $[[R1:[0-9]+]], 4($[[PTR]])
+; MIPS32R6-DAG:  sw $[[R1]],       12($[[PTR]])
+
+; MIPS64-EL:     ld $[[PTR:[0-9]+]], %got_disp(struct_s2)(
+; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]])
+; MIPS64-EL-DAG: lwr $[[R1]],        0($[[PTR]])
+; MIPS64-EL-DAG: swl $[[R1]],       11($[[PTR]])
+; MIPS64-EL-DAG: swr $[[R1]],        8($[[PTR]])
+; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 7($[[PTR]])
+; MIPS64-EL-DAG: lwr $[[R1]],        4($[[PTR]])
+; MIPS64-EL-DAG: swl $[[R1]],       15($[[PTR]])
+; MIPS64-EL-DAG: swr $[[R1]],       12($[[PTR]])
+
+; MIPS64-EB:     ld $[[PTR:[0-9]+]], %got_disp(struct_s2)(
+; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS64-EB-DAG: lwr $[[R1]],        3($[[PTR]])
+; MIPS64-EB-DAG: swl $[[R1]],        8($[[PTR]])
+; MIPS64-EB-DAG: swr $[[R1]],       11($[[PTR]])
+; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 4($[[PTR]])
+; MIPS64-EB-DAG: lwr $[[R1]],        7($[[PTR]])
+; MIPS64-EB-DAG: swl $[[R1]],       12($[[PTR]])
+; MIPS64-EB-DAG: swr $[[R1]],       15($[[PTR]])
+
+; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(struct_s2)(
+; MIPS64R6-DAG:  lw $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS64R6-DAG:  sw $[[R1]],        8($[[PTR]])
+; MIPS64R6-DAG:  lw $[[R1:[0-9]+]], 4($[[PTR]])
+; MIPS64R6-DAG:  sw $[[R1]],       12($[[PTR]])
+
+  %0 = load %struct.S2* getelementptr inbounds (%struct.S2* @struct_s2, i32 0), align 1
+  store %struct.S2 %0, %struct.S2* getelementptr inbounds (%struct.S2* @struct_s2, i32 1), align 1
+  ret void
+}
+
+;
+; Arrays are simply concatenations of the members. They are unaffected by
+; endianness
+;
+
+@arr = common global [7 x i8] zeroinitializer, align 1
+
+define void @pass_array_byval() nounwind {
+entry:
+; ALL-LABEL: pass_array_byval:
+
+; MIPS32-EL:     lw $[[SPTR:[0-9]+]], %got(arr)(
+; MIPS32-EL-DAG: lwl $[[R1:4]], 3($[[PTR]])
+; MIPS32-EL-DAG: lwr $[[R1]],   0($[[PTR]])
+; MIPS32-EL-DAG: lbu $[[R2:[0-9]+]], 4($[[PTR]])
+; MIPS32-EL-DAG: lbu $[[R3:[0-9]+]], 5($[[PTR]])
+; MIPS32-EL-DAG: sll $[[T0:[0-9]+]], $[[R3]], 8
+; MIPS32-EL-DAG: or  $[[T1:[0-9]+]], $[[T0]], $[[R2]]
+; MIPS32-EL-DAG: lbu $[[R4:[0-9]+]], 6($[[PTR]])
+; MIPS32-EL-DAG: sll $[[T2:[0-9]+]], $[[R4]], 16
+; MIPS32-EL-DAG: or  $5, $[[T1]], $[[T2]]
+
+; MIPS32-EB:     lw $[[SPTR:[0-9]+]], %got(arr)(
+; MIPS32-EB-DAG: lwl $[[R1:4]], 0($[[PTR]])
+; MIPS32-EB-DAG: lwr $[[R1]],   3($[[PTR]])
+; MIPS32-EB-DAG: lbu $[[R2:[0-9]+]], 5($[[PTR]])
+; MIPS32-EB-DAG: lbu $[[R3:[0-9]+]], 4($[[PTR]])
+; MIPS32-EB-DAG: sll $[[T0:[0-9]+]], $[[R3]], 8
+; MIPS32-EB-DAG: or  $[[T1:[0-9]+]], $[[T0]], $[[R2]]
+; MIPS32-EB-DAG: sll $[[T1]], $[[T1]], 16
+; MIPS32-EB-DAG: lbu $[[R4:[0-9]+]], 6($[[PTR]])
+; MIPS32-EB-DAG: sll $[[T2:[0-9]+]], $[[R4]], 8
+; MIPS32-EB-DAG: or  $5, $[[T1]], $[[T2]]
+
+; MIPS32R6:        lw $[[SPTR:[0-9]+]], %got(arr)(
+; MIPS32R6-DAG:    lw $4, 0($[[PTR]])
+; MIPS32R6-EL-DAG: lhu $[[R2:[0-9]+]], 4($[[PTR]])
+; MIPS32R6-EL-DAG: lbu $[[R3:[0-9]+]], 6($[[PTR]])
+; MIPS32R6-EL-DAG: sll $[[T0:[0-9]+]], $[[R3]], 16
+; MIPS32R6-EL-DAG: or  $5, $[[R2]], $[[T0]]
+
+; MIPS32R6-EB-DAG: lhu $[[R2:[0-9]+]], 4($[[PTR]])
+; MIPS32R6-EB-DAG: lbu $[[R3:[0-9]+]], 6($[[PTR]])
+; MIPS32R6-EB-DAG: sll $[[T0:[0-9]+]], $[[R2]], 16
+; MIPS32R6-EB-DAG: or  $5, $[[T0]], $[[R3]]
+
+; MIPS64-EL:     ld $[[SPTR:[0-9]+]], %got_disp(arr)(
+; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]])
+; MIPS64-EL-DAG: lwr $[[R1]],   0($[[PTR]])
+
+; MIPS64-EB:     ld $[[SPTR:[0-9]+]], %got_disp(arr)(
+; MIPS64-EB-DAG: lwl  $[[R1:[0-9]+]], 0($[[PTR]])
+; MIPS64-EB-DAG: lwr  $[[R1]],   3($[[PTR]])
+; MIPS64-EB-DAG: dsll $[[R1]], $[[R1]], 32
+; MIPS64-EB-DAG: lbu  $[[R2:[0-9]+]], 5($[[PTR]])
+; MIPS64-EB-DAG: lbu  $[[R3:[0-9]+]], 4($[[PTR]])
+; MIPS64-EB-DAG: dsll $[[T0:[0-9]+]], $[[R3]], 8
+; MIPS64-EB-DAG: or   $[[T1:[0-9]+]], $[[T0]], $[[R2]]
+; MIPS64-EB-DAG: dsll $[[T1]], $[[T1]], 16
+; MIPS64-EB-DAG: or   $[[T3:[0-9]+]], $[[R1]], $[[T1]]
+; MIPS64-EB-DAG: lbu  $[[R4:[0-9]+]], 6($[[PTR]])
+; MIPS64-EB-DAG: dsll $[[T4:[0-9]+]], $[[R4]], 8
+; MIPS64-EB-DAG: or   $4, $[[T3]], $[[T4]]
+
+; MIPS64R6:      ld $[[SPTR:[0-9]+]], %got_disp(arr)(
+
+  tail call void @extern_func([7 x i8]* byval @arr) nounwind
+  ret void
+}
+
+declare void @extern_func([7 x i8]* byval)
diff --git a/llvm/test/CodeGen/Mips/mips64load-store-left-right.ll b/llvm/test/CodeGen/Mips/mips64load-store-left-right.ll
deleted file mode 100644
index c9ba467..0000000
--- a/llvm/test/CodeGen/Mips/mips64load-store-left-right.ll
+++ /dev/null
@@ -1,75 +0,0 @@
-; RUN: llc -march=mips64el -mcpu=mips4 -mattr=n64 < %s | FileCheck  -check-prefix=EL %s
-; RUN: llc -march=mips64 -mcpu=mips4 -mattr=n64 < %s | FileCheck  -check-prefix=EB %s
-; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck  -check-prefix=EL %s
-; RUN: llc -march=mips64 -mcpu=mips64 -mattr=n64 < %s | FileCheck  -check-prefix=EB %s
-
-%struct.SLL = type { i64 }
-%struct.SI = type { i32 }
-%struct.SUI = type { i32 }
-
-@sll = common global %struct.SLL zeroinitializer, align 1
-@si = common global %struct.SI zeroinitializer, align 1
-@sui = common global %struct.SUI zeroinitializer, align 1
-
-define i64 @foo_load_ll() nounwind readonly {
-entry:
-; EL: ldl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]])
-; EL: ldr $[[R0]], 0($[[R1]])
-; EB: ldl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: ldr $[[R0]], 7($[[R1]])
-
-  %0 = load i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
-  ret i64 %0
-}
-
-define i64 @foo_load_i() nounwind readonly {
-entry:
-; EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
-; EL: lwr $[[R0]], 0($[[R1]])
-; EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: lwr $[[R0]], 3($[[R1]])
-
-  %0 = load i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
-  %conv = sext i32 %0 to i64
-  ret i64 %conv
-}
-
-define i64 @foo_load_ui() nounwind readonly {
-entry:
-; EL: lwl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
-; EL: lwr $[[R0]], 0($[[R1]])
-; EL: daddiu $[[R2:[0-9]+]], $zero, 1
-; EL: dsll   $[[R3:[0-9]+]], $[[R2]], 32
-; EL: daddiu $[[R4:[0-9]+]], $[[R3]], -1
-; EL: and    ${{[0-9]+}}, $[[R0]], $[[R4]]
-; EB: lwl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: lwr $[[R0]], 3($[[R1]])
-
-
-  %0 = load i32* getelementptr inbounds (%struct.SUI* @sui, i64 0, i32 0), align 1
-  %conv = zext i32 %0 to i64
-  ret i64 %conv
-}
-
-define void @foo_store_ll(i64 %a) nounwind {
-entry:
-; EL: sdl $[[R0:[0-9]+]], 7($[[R1:[0-9]+]])
-; EL: sdr $[[R0]], 0($[[R1]])
-; EB: sdl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: sdr $[[R0]], 7($[[R1]])
-
-  store i64 %a, i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
-  ret void
-}
-
-define void @foo_store_i(i32 %a) nounwind {
-entry:
-; EL: swl $[[R0:[0-9]+]], 3($[[R1:[0-9]+]])
-; EL: swr $[[R0]], 0($[[R1]])
-; EB: swl $[[R0:[0-9]+]], 0($[[R1:[0-9]+]])
-; EB: swr $[[R0]], 3($[[R1]])
-
-  store i32 %a, i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
-  ret void
-}
-
diff --git a/llvm/test/CodeGen/Mips/unalignedload.ll b/llvm/test/CodeGen/Mips/unalignedload.ll
index e86b1ba..2002b1c 100644
--- a/llvm/test/CodeGen/Mips/unalignedload.ll
+++ b/llvm/test/CodeGen/Mips/unalignedload.ll
@@ -1,5 +1,9 @@
-; RUN: llc  < %s -march=mipsel  | FileCheck %s -check-prefix=ALL -check-prefix=CHECK-EL
-; RUN: llc  < %s -march=mips    | FileCheck %s -check-prefix=ALL -check-prefix=CHECK-EB
+; RUN: llc  < %s -march=mipsel -mcpu=mips32   | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EL -check-prefix=MIPS32-EL
+; RUN: llc  < %s -march=mips   -mcpu=mips32   | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EB -check-prefix=MIPS32-EB
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EL -check-prefix=MIPS32-EL
+; RUN: llc  < %s -march=mips   -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EB -check-prefix=MIPS32-EB
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EL -check-prefix=MIPS32R6-EL
+; RUN: llc  < %s -march=mips   -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=ALL-EB -check-prefix=MIPS32R6-EB
 %struct.S2 = type { %struct.S1, %struct.S1 }
 %struct.S1 = type { i8, i8 }
 %struct.S4 = type { [7 x i8] }
@@ -11,17 +15,20 @@ define void @bar1() nounwind {
 entry:
 ; ALL-LABEL: bar1:
 
-; ALL-DAG:      lw $[[R0:[0-9]+]], %got(s2)(
+; ALL-DAG:       lw $[[R0:[0-9]+]], %got(s2)(
 
-; ALL-DAG:      lbu $[[PART1:[0-9]+]], 2($[[R0]])
-; ALL-DAG:      lbu $[[PART2:[0-9]+]], 3($[[R0]])
+; MIPS32-EL-DAG: lbu $[[PART1:[0-9]+]], 2($[[R0]])
+; MIPS32-EL-DAG: lbu $[[PART2:[0-9]+]], 3($[[R0]])
+; MIPS32-EL-DAG: sll $[[T0:[0-9]+]], $[[PART2]], 8
+; MIPS32-EL-DAG: or  $4, $[[T0]], $[[PART1]]
 
-; CHECK-EL-DAG: sll $[[T0:[0-9]+]], $[[PART2]], 8
-; CHECK-EL-DAG: or  $4, $[[T0]], $[[PART1]]
+; MIPS32-EB-DAG: lbu $[[PART1:[0-9]+]], 2($[[R0]])
+; MIPS32-EB-DAG: lbu $[[PART2:[0-9]+]], 3($[[R0]])
+; MIPS32-EB-DAG: sll $[[T0:[0-9]+]], $[[PART1]], 8
+; MIPS32-EB-DAG: or  $[[T1:[0-9]+]], $[[T0]], $[[PART2]]
+; MIPS32-EB-DAG: sll $4, $[[T1]], 16
 
-; CHECK-EB-DAG: sll $[[T0:[0-9]+]], $[[PART1]], 8
-; CHECK-EB-DAG: or  $[[T1:[0-9]+]], $[[T0]], $[[PART2]]
-; CHECK-EB-DAG: sll $4, $[[T1]], 16
+; MIPS32R6-DAG:  lhu $[[PART1:[0-9]+]], 2($[[R0]])
 
   tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind
   ret void
@@ -31,13 +38,43 @@ define void @bar2() nounwind {
 entry:
 ; ALL-LABEL: bar2:
 
-; ALL-DAG:      lw $[[R2:[0-9]+]], %got(s4)(
+; ALL-DAG:       lw $[[R2:[0-9]+]], %got(s4)(
 
-; CHECK-EL-DAG: lwl $[[R1:4]], 3($[[R2]])
-; CHECK-EL-DAG: lwr $[[R1]], 0($[[R2]])
+; MIPS32-EL-DAG: lwl $[[R1:4]], 3($[[R2]])
+; MIPS32-EL-DAG: lwr $[[R1]], 0($[[R2]])
+; MIPS32-EL-DAG: lbu $[[T0:[0-9]+]], 4($[[R2]])
+; MIPS32-EL-DAG: lbu $[[T1:[0-9]+]], 5($[[R2]])
+; MIPS32-EL-DAG: lbu $[[T2:[0-9]+]], 6($[[R2]])
+; MIPS32-EL-DAG: sll $[[T3:[0-9]+]], $[[T1]], 8
+; MIPS32-EL-DAG: or  $[[T4:[0-9]+]], $[[T3]], $[[T0]]
+; MIPS32-EL-DAG: sll $[[T5:[0-9]+]], $[[T2]], 16
+; MIPS32-EL-DAG: or  $5, $[[T4]], $[[T5]]
 
-; CHECK-EB-DAG: lwl $[[R1:4]], 0($[[R2]])
-; CHECK-EB-DAG: lwr $[[R1]], 3($[[R2]])
+; MIPS32-EB-DAG: lwl $[[R1:4]], 0($[[R2]])
+; MIPS32-EB-DAG: lwr $[[R1]], 3($[[R2]])
+; MIPS32-EB-DAG: lbu $[[T0:[0-9]+]], 4($[[R2]])
+; MIPS32-EB-DAG: lbu $[[T1:[0-9]+]], 5($[[R2]])
+; MIPS32-EB-DAG: lbu $[[T2:[0-9]+]], 6($[[R2]])
+; MIPS32-EB-DAG: sll $[[T3:[0-9]+]], $[[T0]], 8
+; MIPS32-EB-DAG: or  $[[T4:[0-9]+]], $[[T3]], $[[T1]]
+; MIPS32-EB-DAG: sll $[[T5:[0-9]+]], $[[T4]], 16
+; MIPS32-EB-DAG: sll $[[T6:[0-9]+]], $[[T2]], 8
+; MIPS32-EB-DAG: or  $5, $[[T5]], $[[T6]]
+
+; FIXME: We should be able to do better than this using lhu
+; MIPS32R6-EL-DAG: lw $4, 0($[[R2]])
+; MIPS32R6-EL-DAG: lhu $[[T0:[0-9]+]], 4($[[R2]])
+; MIPS32R6-EL-DAG: lbu $[[T1:[0-9]+]], 6($[[R2]])
+; MIPS32R6-EL-DAG: sll $[[T2:[0-9]+]], $[[T1]], 16
+; MIPS32R6-EL-DAG: or  $5, $[[T0]], $[[T2]]
+
+; FIXME: We should be able to do better than this using lhu
+; MIPS32R6-EB-DAG: lw $4, 0($[[R2]])
+; MIPS32R6-EB-DAG: lhu $[[T0:[0-9]+]], 4($[[R2]])
+; MIPS32R6-EB-DAG: lbu $[[T1:[0-9]+]], 6($[[R2]])
+; MIPS32R6-EB-DAG: sll $[[T2:[0-9]+]], $[[T0]], 16
+; MIPS32R6-EB-DAG: sll $[[T3:[0-9]+]], $[[T1]], 8
+; MIPS32R6-EB-DAG: or  $5, $[[T2]], $[[T3]]
 
   tail call void @foo4(%struct.S4* byval @s4) nounwind
   ret void
diff --git a/llvm/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s b/llvm/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s
new file mode 100644
index 0000000..aee068a
--- /dev/null
+++ b/llvm/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s
@@ -0,0 +1,15 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        lwl       $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwr       $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swl       $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swr       $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        lwre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        swle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        swre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/llvm/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s b/llvm/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s
new file mode 100644
index 0000000..f7949bb
--- /dev/null
+++ b/llvm/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s
@@ -0,0 +1,15 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        lwl       $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwr       $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swl       $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swr       $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        lwre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        swle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        swre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/llvm/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s b/llvm/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s
new file mode 100644
index 0000000..7424f49
--- /dev/null
+++ b/llvm/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s
@@ -0,0 +1,23 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        ldl       $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ldr       $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdl       $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sdr       $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        ldle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        ldre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        sdle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        sdre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        lwl       $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwr       $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swl       $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swr       $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        lwre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        swle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        swre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
diff --git a/llvm/test/MC/Mips/mips64r6/invalid-mips3.s b/llvm/test/MC/Mips/mips64r6/invalid-mips3.s
new file mode 100644
index 0000000..1225005
--- /dev/null
+++ b/llvm/test/MC/Mips/mips64r6/invalid-mips3.s
@@ -0,0 +1,8 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        addi      $13,$9,26322        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-- 
2.7.4