Similar to what we've done for HasMFence - this puts into place a pseudo-predicate for CLFLUSH instructions that separates it from HasSSE2 to make it easier to use CLFLUSH even when SSE/fpmath has been disabled - technically CLFLUSH has its own CPUID bit, so could be available on x86 cores entirely without SSE, but I don't think thats ever happened or likely to happen.
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def HasSHSTK : Predicate<"Subtarget->hasSHSTK()">;
+def HasCLFLUSH : Predicate<"Subtarget->hasCLFLUSH()">;
def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">;
def HasCLWB : Predicate<"Subtarget->hasCLWB()">;
def HasWBNOINVD : Predicate<"Subtarget->hasWBNOINVD()">;
// Flush cache
def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
"clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
- PS, Requires<[HasSSE2]>;
+ PS, Requires<[HasCLFLUSH]>;
}
let SchedRW = [WriteNop] in {
bool isXRaySupported() const override { return is64Bit(); }
+ /// Use clflush if we have SSE2 or we're on x86-64 (even if we asked for
+ /// no-sse2). There isn't any reason to disable it if the target processor
+ /// supports it.
+ bool hasCLFLUSH() const { return hasSSE2() || is64Bit(); }
+
/// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
/// no-sse2). There isn't any reason to disable it if the target processor
/// supports it.
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=X64
+
+; It doesn't matter if an x86-64 target has specified "no-sse2"; we still can use clflush.
+
+define void @clflush(ptr %p) nounwind {
+; X86-LABEL: clflush:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: clflush (%eax)
+; X86-NEXT: retl
+;
+; X64-LABEL: clflush:
+; X64: # %bb.0:
+; X64-NEXT: clflush (%rdi)
+; X64-NEXT: retq
+ tail call void @llvm.x86.sse2.clflush(ptr %p)
+ ret void
+}
+declare void @llvm.x86.sse2.clflush(ptr) nounwind