[x86-64] allow mfence even with -mno-sse (PR23203)

author Sanjay Patel <spatel@rotateright.com>

Sat, 13 Feb 2016 17:26:29 +0000 (17:26 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Sat, 13 Feb 2016 17:26:29 +0000 (17:26 +0000)
author Sanjay Patel <spatel@rotateright.com>
Sat, 13 Feb 2016 17:26:29 +0000 (17:26 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Sat, 13 Feb 2016 17:26:29 +0000 (17:26 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 277fb81..a105038 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19717,13 +19717,6 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
    }
  }
  
-static bool hasMFENCE(const X86Subtarget &Subtarget) {
-  // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
-  // no-sse2). There isn't any reason to disable it if the target processor
-  // supports it.
-  return Subtarget.hasSSE2() || Subtarget.is64Bit();
-}
-
  LoadInst *
  X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
    unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
@@ -19763,7 +19756,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
      // the IR level, so we must wrap it in an intrinsic.
      return nullptr;
  
-  if (!hasMFENCE(Subtarget))
+  if (!Subtarget.hasMFence())
      // FIXME: it might make sense to use a locked operation here but on a
      // different cache-line to prevent cache-line bouncing. In practice it
      // is probably a small win, and x86 processors without mfence are rare
@@ -19794,7 +19787,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
    // The only fence that needs an instruction is a sequentially-consistent
    // cross-thread fence.
    if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) {
-    if (hasMFENCE(Subtarget))
+    if (Subtarget.hasMFence())
        return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
  
      SDValue Chain = Op.getOperand(0);
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td

index 7178f1f..712711b 100644 (file)
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -845,6 +845,7 @@ def CallImmAddr  : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
  def FavorMemIndirectCall  : Predicate<"!Subtarget->callRegIndirect()">;
  def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;
  def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
+def HasMFence    : Predicate<"Subtarget->hasMFence()">;
  
  //===----------------------------------------------------------------------===//
  // X86 Instruction Format Definitions.
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td

index 499c4c1..9c127ff 100644 (file)
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -3762,6 +3762,8 @@ def PAUSE : I<0x90, RawFrm, (outs), (ins),
  
  let SchedRW = [WriteFence] in {
  // Load, store, and memory fence
+// TODO: As with mfence, we may want to ease the availablity of sfence/lfence
+// to include any 64-bit target.
  def SFENCE : I<0xAE, MRM_F8, (outs), (ins),
                 "sfence", [(int_x86_sse_sfence)], IIC_SSE_SFENCE>,
                 PS, Requires<[HasSSE1]>;
@@ -3770,7 +3772,7 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
                 TB, Requires<[HasSSE2]>;
  def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
                 "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>,
-               TB, Requires<[HasSSE2]>;
+               TB, Requires<[HasMFence]>;
  } // SchedRW
  
  def : Pat<(X86SFence), (SFENCE)>;
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h

index 501770c..86f2540 100644 (file)
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -446,6 +446,11 @@ public:
    bool isSLM() const { return X86ProcFamily == IntelSLM; }
    bool useSoftFloat() const { return UseSoftFloat; }
  
+  /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
+  /// no-sse2). There isn't any reason to disable it if the target processor
+  /// supports it.
+  bool hasMFence() const { return hasSSE2() || is64Bit(); }
+
    const Triple &getTargetTriple() const { return TargetTriple; }
  
    bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
diff --git a/llvm/test/CodeGen/X86/mfence.ll b/llvm/test/CodeGen/X86/mfence.ll

index e1825f2..b67a5c3 100644 (file)
--- a/llvm/test/CodeGen/X86/mfence.ll
+++ b/llvm/test/CodeGen/X86/mfence.ll
@@ -1,11 +1,37 @@
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=X64
+
+; It doesn't matter if an x86-64 target has specified "no-sse2"; we still can use mfence.
  
  define void @test() {
-; CHECK-LABEL: test:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    mfence
-; CHECK-NEXT:    retl
+; X32-LABEL: test:
+; X32:       # BB#0:
+; X32-NEXT:    mfence
+; X32-NEXT:    retl
+;
+; X64-LABEL: test:
+; X64:       # BB#0:
+; X64-NEXT:    mfence
+; X64-NEXT:    retq
    fence seq_cst
    ret void
  }
  
+define i32 @fence(i32* %ptr) {
+; X32-LABEL: fence:
+; X32:       # BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    mfence
+; X32-NEXT:    movl (%eax), %eax
+; X32-NEXT:    retl
+;
+; X64-LABEL: fence:
+; X64:       # BB#0:
+; X64-NEXT:    mfence
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    retq
+  %atomic = atomicrmw add i32* %ptr, i32 0 seq_cst
+  ret i32 %atomic
+}
+
author	Sanjay Patel <spatel@rotateright.com>
	Sat, 13 Feb 2016 17:26:29 +0000 (17:26 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Sat, 13 Feb 2016 17:26:29 +0000 (17:26 +0000)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86InstrInfo.td		patch \| blob \| history
llvm/lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
llvm/lib/Target/X86/X86Subtarget.h		patch \| blob \| history
llvm/test/CodeGen/X86/mfence.ll		patch \| blob \| history