[x86] fix cost model inaccuracy for vector memory ops

author Sanjay Patel <spatel@rotateright.com>

Wed, 9 Mar 2016 22:23:33 +0000 (22:23 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Wed, 9 Mar 2016 22:23:33 +0000 (22:23 +0000)
author Sanjay Patel <spatel@rotateright.com>
Wed, 9 Mar 2016 22:23:33 +0000 (22:23 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Wed, 9 Mar 2016 22:23:33 +0000 (22:23 +0000)
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp

index efa7feb..ba977eb 100644 (file)
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -983,10 +983,10 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    // Each load/store unit costs 1.
    int Cost = LT.first * 1;
  
-  // On Sandybridge 256bit load/stores are double pumped
-  // (but not on Haswell).
-  if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2())
-    Cost*=2;
+  // This isn't exactly right. We're using slow unaligned 32-byte accesses as a
+  // proxy for a double-pumped AVX memory interface such as on Sandybridge.
+  if (LT.second.getStoreSize() == 32 && ST->isUnalignedMem32Slow())
+    Cost *= 2;
  
    return Cost;
  }
diff --git a/llvm/test/Transforms/LoopVectorize/X86/avx1.ll b/llvm/test/Transforms/LoopVectorize/X86/avx1.ll

index 45d2346..d384a81 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/X86/avx1.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/avx1.ll
@@ -26,10 +26,10 @@ define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwta
    ret i32 undef
  }
  
-;;; FIXME: If 32-byte accesses are fast, this should use a <4 x i64> load.
  
  ; CHECK-LABEL: @read_mod_i64(
-; CHECK: load <2 x i64>
+; SLOWMEM32: load <2 x i64>
+; FASTMEM32: load <4 x i64>
  ; CHECK: ret i32
  define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp {
    %1 = icmp sgt i32 %n, 0
author	Sanjay Patel <spatel@rotateright.com>
	Wed, 9 Mar 2016 22:23:33 +0000 (22:23 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Wed, 9 Mar 2016 22:23:33 +0000 (22:23 +0000)
llvm/lib/Target/X86/X86TargetTransformInfo.cpp		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/X86/avx1.ll		patch \| blob \| history