Avoid using lossy load / stores for memcpy / memset expansion. e.g.

author Evan Cheng <evan.cheng@apple.com>

Wed, 12 Dec 2012 00:42:09 +0000 (00:42 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Wed, 12 Dec 2012 00:42:09 +0000 (00:42 +0000)
author Evan Cheng <evan.cheng@apple.com>
Wed, 12 Dec 2012 00:42:09 +0000 (00:42 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Wed, 12 Dec 2012 00:42:09 +0000 (00:42 +0000)
diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h

index faf0a01..f301043 100644 (file)
--- a/llvm/include/llvm/Target/TargetLowering.h
+++ b/llvm/include/llvm/Target/TargetLowering.h
@@ -716,6 +716,15 @@ public:
      return MVT::Other;
    }
  
+  /// isLegalMemOpType - Returns true if it's legal to use load / store of the
+  /// specified type to expand memcpy / memset inline. This is mostly true
+  /// for legal types except for some special cases. For example, on X86
+  /// targets without SSE2 f64 load / store are done with fldl / fstpl which
+  /// also does type conversion.
+  virtual bool isLegalMemOpType(MVT VT) const {
+    return VT.isInteger();
+  }
+
    /// usesUnderscoreSetJmp - Determine if we should use _setjmp or setjmp
    /// to implement llvm.setjmp.
    bool usesUnderscoreSetJmp() const {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index 4cb63ce..36592e5 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3474,27 +3474,33 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
      unsigned VTSize = VT.getSizeInBits() / 8;
      while (VTSize > Size) {
        // For now, only use non-vector load / store's for the left-over pieces.
-      EVT NewVT;
+      EVT NewVT = VT;
        unsigned NewVTSize;
+
+      bool Found = false;
        if (VT.isVector() || VT.isFloatingPoint()) {
          NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
-        while (!TLI.isOperationLegalOrCustom(ISD::STORE, NewVT)) {
-          if (NewVT == MVT::i64 &&
-              TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64)) {
-            // i64 is usually not legal on 32-bit targets, but f64 may be.
-            NewVT = MVT::f64;
-            break;
-          }
-          NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
+        if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) &&
+            TLI.isLegalMemOpType(NewVT.getSimpleVT()))
+          Found = true;
+        else if (NewVT == MVT::i64 &&
+                 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
+                 TLI.isLegalMemOpType(MVT::f64)) {
+          // i64 is usually not legal on 32-bit targets, but f64 may be.
+          NewVT = MVT::f64;
+          Found = true;
          }
-        NewVTSize = NewVT.getSizeInBits() / 8;
-      } else {
-        // This can result in a type that is not legal on the target, e.g.
-        // 1 or 2 bytes on PPC.
-        NewVT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
-        NewVTSize = VTSize >> 1;
        }
  
+      if (!Found) {
+        do {
+          NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
+          if (NewVT == MVT::i8)
+            break;
+        } while (!TLI.isLegalMemOpType(NewVT.getSimpleVT()));
+      }
+      NewVTSize = NewVT.getSizeInBits() / 8;
+
        // If the new VT cannot cover all of the remaining bits, then consider
        // issuing a (or a pair of) unaligned and overlapping load / store.
        // FIXME: Only does this for 64-bit or more since we don't have proper
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp

index 88282c7..de7159e 100644 (file)
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -9481,6 +9481,10 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
    return MVT::Other;
  }
  
+bool ARMTargetLowering::isLegalMemOpType(MVT VT) const {
+  return VT.isInteger() || VT == MVT::f64 || VT == MVT::v2f64;
+}
+
  bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
    if (Val.getOpcode() != ISD::LOAD)
      return false;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h

index 5a44201..3e78ae3 100644 (file)
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -296,6 +296,13 @@ namespace llvm {
                                      bool MemcpyStrSrc,
                                      MachineFunction &MF) const;
  
+    /// isLegalMemOpType - Returns true if it's legal to use load / store of the
+    /// specified type to expand memcpy / memset inline. This is mostly true
+    /// for legal types except for some special cases. For example, on X86
+    /// targets without SSE2 f64 load / store are done with fldl / fstpl which
+    /// also does type conversion.
+    virtual bool isLegalMemOpType(MVT VT) const;
+
      using TargetLowering::isZExtFree;
      virtual bool isZExtFree(SDValue Val, EVT VT2) const;
  
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 90bee41..800c201 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1412,6 +1412,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
    return MVT::i32;
  }
  
+bool X86TargetLowering::isLegalMemOpType(MVT VT) const {
+  if (VT == MVT::f32)
+    return X86ScalarSSEf32;
+  else if (VT == MVT::f64)
+    return X86ScalarSSEf64;
+  return VT.isInteger();
+}
+
  bool
  X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
    if (Fast)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h

index a515be2..9d22da1 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -506,6 +506,13 @@ namespace llvm {
                          bool IsZeroVal, bool MemcpyStrSrc,
                          MachineFunction &MF) const;
  
+    /// isLegalMemOpType - Returns true if it's legal to use load / store of the
+    /// specified type to expand memcpy / memset inline. This is mostly true
+    /// for legal types except for some special cases. For example, on X86
+    /// targets without SSE2 f64 load / store are done with fldl / fstpl which
+    /// also does type conversion.
+    virtual bool isLegalMemOpType(MVT VT) const;
+
      /// allowsUnalignedMemoryAccesses - Returns true if the target allows
      /// unaligned memory accesses. of the specified type. Returns whether it
      /// is "fast" by reference in the second argument.
diff --git a/llvm/test/CodeGen/X86/memcpy-2.ll b/llvm/test/CodeGen/X86/memcpy-2.ll

index dcc8f0d..949d6a4 100644 (file)
--- a/llvm/test/CodeGen/X86/memcpy-2.ll
+++ b/llvm/test/CodeGen/X86/memcpy-2.ll
@@ -17,11 +17,11 @@ entry:
  ; SSE2: movb $0, 24(%esp)
  
  ; SSE1: t1:
-; SSE1: fldl _.str+16
-; SSE1: fstpl 16(%esp)
  ; SSE1: movaps _.str, %xmm0
  ; SSE1: movaps %xmm0
  ; SSE1: movb $0, 24(%esp)
+; SSE1: movl $0, 20(%esp)
+; SSE1: movl $0, 16(%esp)
  
  ; NOSSE: t1:
  ; NOSSE: movb $0
author	Evan Cheng <evan.cheng@apple.com>
	Wed, 12 Dec 2012 00:42:09 +0000 (00:42 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Wed, 12 Dec 2012 00:42:09 +0000 (00:42 +0000)
llvm/include/llvm/Target/TargetLowering.h		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
llvm/lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/ARM/ARMISelLowering.h		patch \| blob \| history
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
llvm/test/CodeGen/X86/memcpy-2.ll		patch \| blob \| history