[X86] Add CMPXCHG8B feature flag. Set it for all CPUs except i386/i486 including...

author Craig Topper <craig.topper@intel.com>

Wed, 20 Mar 2019 23:35:49 +0000 (23:35 +0000)

committer Craig Topper <craig.topper@intel.com>

Wed, 20 Mar 2019 23:35:49 +0000 (23:35 +0000)
author Craig Topper <craig.topper@intel.com>
Wed, 20 Mar 2019 23:35:49 +0000 (23:35 +0000)
committer Craig Topper <craig.topper@intel.com>
Wed, 20 Mar 2019 23:35:49 +0000 (23:35 +0000)
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp

index 52e7080e7446f32cd07488fd45b331ffb7de4221..69362704687c9654602d8a1fc28977466c982730 100644 (file)
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -1264,6 +1264,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
  
    getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
  
+  Features["cx8"]    = (EDX >>  8) & 1;
    Features["cmov"]   = (EDX >> 15) & 1;
    Features["mmx"]    = (EDX >> 23) & 1;
    Features["fxsr"]   = (EDX >> 24) & 1;
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td

index 7deae9152f81940b9ace89b2dacc47c8390f38b9..fa8dd8a59f06a8c266732599037cbe5e855ae956 100644 (file)
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -39,6 +39,9 @@ def FeatureNOPL    : SubtargetFeature<"nopl", "HasNOPL", "true",
  def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
                                        "Enable conditional move instructions">;
  
+def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true",
+                                        "Support CMPXCHG8B instructions">;
+
  def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
                                         "Support POPCNT instruction">;
  
@@ -471,6 +474,7 @@ include "X86SchedSkylakeServer.td"
  def ProcessorFeatures {
    // Nehalem
    list<SubtargetFeature> NHMInheritableFeatures = [FeatureX87,
+                                                   FeatureCMPXCHG8B,
                                                     FeatureCMOV,
                                                     FeatureMMX,
                                                     FeatureSSE42,
@@ -629,6 +633,7 @@ def ProcessorFeatures {
  
    // Atom
    list<SubtargetFeature> AtomInheritableFeatures = [FeatureX87,
+                                                    FeatureCMPXCHG8B,
                                                      FeatureCMOV,
                                                      FeatureMMX,
                                                      FeatureSSSE3,
@@ -707,6 +712,7 @@ def ProcessorFeatures {
  
    // Knights Landing
    list<SubtargetFeature> KNLFeatures = [FeatureX87,
+                                        FeatureCMPXCHG8B,
                                          FeatureCMOV,
                                          FeatureMMX,
                                          FeatureFXSR,
@@ -749,6 +755,7 @@ def ProcessorFeatures {
  
    // Bobcat
    list<SubtargetFeature> BtVer1InheritableFeatures = [FeatureX87,
+                                                      FeatureCMPXCHG8B,
                                                        FeatureCMOV,
                                                        FeatureMMX,
                                                        FeatureSSSE3,
@@ -785,6 +792,7 @@ def ProcessorFeatures {
  
    // Bulldozer
    list<SubtargetFeature> BdVer1InheritableFeatures = [FeatureX87,
+                                                      FeatureCMPXCHG8B,
                                                        FeatureCMOV,
                                                        FeatureXOP,
                                                        Feature64Bit,
@@ -883,23 +891,31 @@ def ProcessorFeatures {
  class Proc<string Name, list<SubtargetFeature> Features>
   : ProcessorModel<Name, GenericModel, Features>;
  
-def : Proc<"generic",         [FeatureX87, FeatureSlowUAMem16]>;
+// NOTE: CMPXCHG8B is here for legacy compatbility so that it is only disabled
+// if i386/i486 is specifically requested.
+def : Proc<"generic",         [FeatureX87, FeatureSlowUAMem16,
+                               FeatureCMPXCHG8B]>;
  def : Proc<"i386",            [FeatureX87, FeatureSlowUAMem16]>;
  def : Proc<"i486",            [FeatureX87, FeatureSlowUAMem16]>;
-def : Proc<"i586",            [FeatureX87, FeatureSlowUAMem16]>;
-def : Proc<"pentium",         [FeatureX87, FeatureSlowUAMem16]>;
-def : Proc<"pentium-mmx",     [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
-
-def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV]>;
-def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV,
-                          FeatureNOPL]>;
-
-def : Proc<"pentium2",        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
-                               FeatureCMOV, FeatureFXSR, FeatureNOPL]>;
+def : Proc<"i586",            [FeatureX87, FeatureSlowUAMem16,
+                               FeatureCMPXCHG8B]>;
+def : Proc<"pentium",         [FeatureX87, FeatureSlowUAMem16,
+                               FeatureCMPXCHG8B]>;
+def : Proc<"pentium-mmx",     [FeatureX87, FeatureSlowUAMem16,
+                               FeatureCMPXCHG8B, FeatureMMX]>;
+
+def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                    FeatureCMOV]>;
+def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                          FeatureCMOV, FeatureNOPL]>;
+
+def : Proc<"pentium2",        [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                               FeatureMMX, FeatureCMOV, FeatureFXSR,
+                               FeatureNOPL]>;
  
  foreach P = ["pentium3", "pentium3m"] in {
-  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
-                 FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
+  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,FeatureMMX,
+                 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
  }
  
  // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
@@ -913,13 +929,15 @@ foreach P = ["pentium3", "pentium3m"] in {
  // changes slightly.
  
  def : ProcessorModel<"pentium-m", GenericPostRAModel,
-                     [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
-                      FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
+                     [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                      FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
+                      FeatureCMOV]>;
  
  foreach P = ["pentium4", "pentium4m"] in {
    def : ProcessorModel<P, GenericPostRAModel,
-                       [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
-                        FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
+                       [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                        FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
+                        FeatureCMOV]>;
  }
  
  // Intel Quark.
@@ -927,16 +945,19 @@ def : Proc<"lakemont",        []>;
  
  // Intel Core Duo.
  def : ProcessorModel<"yonah", SandyBridgeModel,
-                     [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
-                      FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
+                     [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                      FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL,
+                      FeatureCMOV]>;
  
  // NetBurst.
  def : ProcessorModel<"prescott", GenericPostRAModel,
-                     [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
-                      FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
+                     [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                      FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL,
+                      FeatureCMOV]>;
  def : ProcessorModel<"nocona", GenericPostRAModel, [
    FeatureX87,
    FeatureSlowUAMem16,
+  FeatureCMPXCHG8B,
    FeatureCMOV,
    FeatureMMX,
    FeatureSSE3,
@@ -950,6 +971,7 @@ def : ProcessorModel<"nocona", GenericPostRAModel, [
  def : ProcessorModel<"core2", SandyBridgeModel, [
    FeatureX87,
    FeatureSlowUAMem16,
+  FeatureCMPXCHG8B,
    FeatureCMOV,
    FeatureMMX,
    FeatureSSSE3,
@@ -963,6 +985,7 @@ def : ProcessorModel<"core2", SandyBridgeModel, [
  def : ProcessorModel<"penryn", SandyBridgeModel, [
    FeatureX87,
    FeatureSlowUAMem16,
+  FeatureCMPXCHG8B,
    FeatureCMOV,
    FeatureMMX,
    FeatureSSE41,
@@ -1033,36 +1056,41 @@ def : ProcessorModel<"icelake-server", SkylakeServerModel,
  
  // AMD CPUs.
  
-def : Proc<"k6",              [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
-def : Proc<"k6-2",            [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
-def : Proc<"k6-3",            [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"k6",   [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                    FeatureMMX]>;
+def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                    Feature3DNow]>;
+def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                               Feature3DNow]>;
  
  foreach P = ["athlon", "athlon-tbird"] in {
-  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, Feature3DNowA,
-                 FeatureNOPL, FeatureSlowSHLD]>;
+  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV,
+                 Feature3DNowA, FeatureNOPL, FeatureSlowSHLD]>;
  }
  
  foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
-  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, FeatureSSE1,
-                 Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureSlowSHLD]>;
+  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV,
+                 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL,
+                 FeatureSlowSHLD]>;
  }
  
  foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
-  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
-                 FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD,
-                 FeatureCMOV]>;
+  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                 FeatureSSE2, Feature3DNowA, FeatureFXSR, FeatureNOPL,
+                 Feature64Bit, FeatureSlowSHLD, FeatureCMOV]>;
  }
  
  foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
-  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
-                 FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD,
-                 FeatureCMOV, Feature64Bit]>;
+  def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureSSE3,
+                 Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B,
+                 FeatureSlowSHLD, FeatureCMOV, Feature64Bit]>;
  }
  
  foreach P = ["amdfam10", "barcelona"] in {
-  def : Proc<P, [FeatureX87, FeatureSSE4A, Feature3DNowA, FeatureFXSR,
-                 FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
-                 FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV, Feature64Bit]>;
+  def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE4A, Feature3DNowA,
+                 FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT,
+                 FeaturePOPCNT, FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV,
+                 Feature64Bit]>;
  }
  
  // Bobcat
@@ -1082,13 +1110,15 @@ def : Proc<"bdver4", ProcessorFeatures.BdVer4Features>;
  def : ProcessorModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures>;
  def : ProcessorModel<"znver2", Znver1Model, ProcessorFeatures.ZN2Features>;
  
-def : Proc<"geode",           [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
+def : Proc<"geode",           [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                               Feature3DNowA]>;
  
  def : Proc<"winchip-c6",      [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
  def : Proc<"winchip2",        [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
  def : Proc<"c3",              [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
-def : Proc<"c3-2",            [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
-                               FeatureSSE1, FeatureFXSR, FeatureCMOV]>;
+def : Proc<"c3-2",            [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                               FeatureMMX, FeatureSSE1, FeatureFXSR,
+                               FeatureCMOV]>;
  
  // We also provide a generic 64-bit specific x86 processor model which tries to
  // be good for modern chips without enabling instruction set encodings past the
@@ -1102,6 +1132,7 @@ def : Proc<"c3-2",            [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
  // forming a common base for them.
  def : ProcessorModel<"x86-64", SandyBridgeModel, [
    FeatureX87,
+  FeatureCMPXCHG8B,
    FeatureCMOV,
    FeatureMMX,
    FeatureSSE2,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index d871f0ce3b7727cda428b94b39828715d0cf32ce..875fe4f4d0accbe87b23d98c77f2c68ea7138c4b 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -158,6 +158,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
      setUseUnderscoreLongJmp(true);
    }
  
+  // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
+  // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
+  // FIXME: Should we be limitting the atomic size on other configs? Default is
+  // 1024.
+  if (!Subtarget.hasCmpxchg8b())
+    setMaxAtomicSizeInBitsSupported(32);
+
    // Set up the register classes.
    addRegisterClass(MVT::i8, &X86::GR8RegClass);
    addRegisterClass(MVT::i16, &X86::GR16RegClass);
@@ -25475,11 +25482,11 @@ bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const {
    unsigned OpWidth = MemType->getPrimitiveSizeInBits();
  
    if (OpWidth == 64)
-    return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b
-  else if (OpWidth == 128)
+    return Subtarget.hasCmpxchg8b() && !Subtarget.is64Bit();
+  if (OpWidth == 128)
      return Subtarget.hasCmpxchg16b();
-  else
-    return false;
+
+  return false;
  }
  
  bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td

index 16afcf6330b0e7ace153232b5ac909adcc1e11d3..4c06b17654326f1b48dd75df7ed4bd52268dcc8a 100644 (file)
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -867,7 +867,7 @@ let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in {
  }
  
  let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
-    SchedRW = [WriteCMPXCHGRMW] in {
+    Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW] in {
  defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>;
  }
  
@@ -891,8 +891,9 @@ defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>;
  // the instruction and we are sure we will have a valid register to restore
  // the value of RBX.
  let Defs = [EAX, EDX, EBX, EFLAGS], Uses = [EAX, ECX, EDX],
-    SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, isPseudo = 1,
-    Constraints = "$ebx_save = $dst", usesCustomInserter = 1 in {
+    Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW],
+    isCodeGenOnly = 1, isPseudo = 1, Constraints = "$ebx_save = $dst",
+    usesCustomInserter = 1 in {
  def LCMPXCHG8B_SAVE_EBX :
      I<0, Pseudo, (outs GR32:$dst),
        (ins i64mem:$ptr, GR32:$ebx_input, GR32:$ebx_save),
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td

index 278dba50cf45e390b11dc0b3dd3b2ca55465292a..67ceceb66982f588c0c3ffb517dc8d72d12db280 100644 (file)
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -880,6 +880,7 @@ def HasWBNOINVD  : Predicate<"Subtarget->hasWBNOINVD()">;
  def HasRDPID     : Predicate<"Subtarget->hasRDPID()">;
  def HasWAITPKG   : Predicate<"Subtarget->hasWAITPKG()">;
  def HasINVPCID   : Predicate<"Subtarget->hasINVPCID()">;
+def HasCmpxchg8b : Predicate<"Subtarget->hasCmpxchg8b()">;
  def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
  def HasPCONFIG   : Predicate<"Subtarget->hasPCONFIG()">;
  def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
@@ -2073,7 +2074,7 @@ def CMPXCHG64rm  : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
  
  let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
  def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
-                  "cmpxchg8b\t$dst", []>, TB;
+                  "cmpxchg8b\t$dst", []>, TB, Requires<[HasCmpxchg8b]>;
  
  let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
  // NOTE: In64BitMode check needed for the AssemblerPredicate.
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h

index ceb1e86769b51e77634a5a4f96a9d9b88f778ad2..8d330fa6f9a905f370727f271e55a1f224ab9c19 100644 (file)
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -88,6 +88,9 @@ protected:
    /// True if the processor supports X87 instructions.
    bool HasX87 = false;
  
+  /// True if the processor supports CMPXCHG8B.
+  bool HasCmpxchg8b = false;
+
    /// True if this processor has NOPL instruction
    /// (generally pentium pro+).
    bool HasNOPL = false;
@@ -546,6 +549,7 @@ public:
    void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
  
    bool hasX87() const { return HasX87; }
+  bool hasCmpxchg8b() const { return HasCmpxchg8b; }
    bool hasNOPL() const { return HasNOPL; }
    // SSE codegen depends on cmovs, and all SSE1+ processors support them.
    // All 64-bit processors support cmov.
diff --git a/llvm/test/CodeGen/X86/atomic64.ll b/llvm/test/CodeGen/X86/atomic64.ll

index ae39bd724db21d9437d3df4941ff2861960410fd..11bd6e0555877a6832bbda8aa15635991fa0bd60 100644 (file)
--- a/llvm/test/CodeGen/X86/atomic64.ll
+++ b/llvm/test/CodeGen/X86/atomic64.ll
@@ -1,5 +1,6 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
  ; RUN: llc < %s -O0 -mtriple=x86_64-- -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64
+; RUN: llc < %s -O0 -mtriple=i386-- -mcpu=i486 -verify-machineinstrs | FileCheck %s --check-prefix I486
  
  @sc64 = external global i64
  @fsc64 = external global double
@@ -13,6 +14,52 @@ define void @atomic_fetch_add64() nounwind {
  ; X64-NEXT:    lock xaddq %rax, {{.*}}(%rip)
  ; X64-NEXT:    lock addq %rax, {{.*}}(%rip)
  ; X64-NEXT:    retq
+;
+; I486-LABEL: atomic_fetch_add64:
+; I486:       # %bb.0: # %entry
+; I486-NEXT:    pushl %esi
+; I486-NEXT:    subl $56, %esp
+; I486-NEXT:    leal sc64, %eax
+; I486-NEXT:    movl %esp, %ecx
+; I486-NEXT:    movl $2, 12(%ecx)
+; I486-NEXT:    movl $0, 8(%ecx)
+; I486-NEXT:    movl $1, 4(%ecx)
+; I486-NEXT:    movl $sc64, (%ecx)
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_fetch_add_8
+; I486-NEXT:    leal sc64, %ecx
+; I486-NEXT:    movl %esp, %esi
+; I486-NEXT:    movl $2, 12(%esi)
+; I486-NEXT:    movl $0, 8(%esi)
+; I486-NEXT:    movl $3, 4(%esi)
+; I486-NEXT:    movl $sc64, (%esi)
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_fetch_add_8
+; I486-NEXT:    leal sc64, %ecx
+; I486-NEXT:    movl %esp, %esi
+; I486-NEXT:    movl $2, 12(%esi)
+; I486-NEXT:    movl $0, 8(%esi)
+; I486-NEXT:    movl $5, 4(%esi)
+; I486-NEXT:    movl $sc64, (%esi)
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_fetch_add_8
+; I486-NEXT:    leal sc64, %ecx
+; I486-NEXT:    movl %esp, %esi
+; I486-NEXT:    movl %edx, 8(%esi)
+; I486-NEXT:    movl %eax, 4(%esi)
+; I486-NEXT:    movl $2, 12(%esi)
+; I486-NEXT:    movl $sc64, (%esi)
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_fetch_add_8
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    addl $56, %esp
+; I486-NEXT:    popl %esi
+; I486-NEXT:    retl
  entry:
    %t1 = atomicrmw add  i64* @sc64, i64 1 acquire
    %t2 = atomicrmw add  i64* @sc64, i64 3 acquire
@@ -30,6 +77,52 @@ define void @atomic_fetch_sub64() nounwind {
  ; X64-NEXT:    lock xaddq %rax, {{.*}}(%rip)
  ; X64-NEXT:    lock subq %rax, {{.*}}(%rip)
  ; X64-NEXT:    retq
+;
+; I486-LABEL: atomic_fetch_sub64:
+; I486:       # %bb.0:
+; I486-NEXT:    pushl %esi
+; I486-NEXT:    subl $56, %esp
+; I486-NEXT:    leal sc64, %eax
+; I486-NEXT:    movl %esp, %ecx
+; I486-NEXT:    movl $2, 12(%ecx)
+; I486-NEXT:    movl $0, 8(%ecx)
+; I486-NEXT:    movl $1, 4(%ecx)
+; I486-NEXT:    movl $sc64, (%ecx)
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_fetch_sub_8
+; I486-NEXT:    leal sc64, %ecx
+; I486-NEXT:    movl %esp, %esi
+; I486-NEXT:    movl $2, 12(%esi)
+; I486-NEXT:    movl $0, 8(%esi)
+; I486-NEXT:    movl $3, 4(%esi)
+; I486-NEXT:    movl $sc64, (%esi)
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_fetch_sub_8
+; I486-NEXT:    leal sc64, %ecx
+; I486-NEXT:    movl %esp, %esi
+; I486-NEXT:    movl $2, 12(%esi)
+; I486-NEXT:    movl $0, 8(%esi)
+; I486-NEXT:    movl $5, 4(%esi)
+; I486-NEXT:    movl $sc64, (%esi)
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_fetch_sub_8
+; I486-NEXT:    leal sc64, %ecx
+; I486-NEXT:    movl %esp, %esi
+; I486-NEXT:    movl %edx, 8(%esi)
+; I486-NEXT:    movl %eax, 4(%esi)
+; I486-NEXT:    movl $2, 12(%esi)
+; I486-NEXT:    movl $sc64, (%esi)
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_fetch_sub_8
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    addl $56, %esp
+; I486-NEXT:    popl %esi
+; I486-NEXT:    retl
    %t1 = atomicrmw sub  i64* @sc64, i64 1 acquire
    %t2 = atomicrmw sub  i64* @sc64, i64 3 acquire
    %t3 = atomicrmw sub  i64* @sc64, i64 5 acquire
@@ -61,6 +154,42 @@ define void @atomic_fetch_and64() nounwind {
  ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
  ; X64-NEXT:    lock andq %rax, {{.*}}(%rip)
  ; X64-NEXT:    retq
+;
+; I486-LABEL: atomic_fetch_and64:
+; I486:       # %bb.0:
+; I486-NEXT:    pushl %esi
+; I486-NEXT:    subl $44, %esp
+; I486-NEXT:    leal sc64, %eax
+; I486-NEXT:    movl %esp, %ecx
+; I486-NEXT:    movl $2, 12(%ecx)
+; I486-NEXT:    movl $0, 8(%ecx)
+; I486-NEXT:    movl $3, 4(%ecx)
+; I486-NEXT:    movl $sc64, (%ecx)
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_fetch_and_8
+; I486-NEXT:    leal sc64, %ecx
+; I486-NEXT:    movl %esp, %esi
+; I486-NEXT:    movl $2, 12(%esi)
+; I486-NEXT:    movl $0, 8(%esi)
+; I486-NEXT:    movl $5, 4(%esi)
+; I486-NEXT:    movl $sc64, (%esi)
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_fetch_and_8
+; I486-NEXT:    leal sc64, %ecx
+; I486-NEXT:    movl %esp, %esi
+; I486-NEXT:    movl %edx, 8(%esi)
+; I486-NEXT:    movl %eax, 4(%esi)
+; I486-NEXT:    movl $2, 12(%esi)
+; I486-NEXT:    movl $sc64, (%esi)
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_fetch_and_8
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    addl $44, %esp
+; I486-NEXT:    popl %esi
+; I486-NEXT:    retl
    %t1 = atomicrmw and  i64* @sc64, i64 3 acquire
    %t2 = atomicrmw and  i64* @sc64, i64 5 acquire
    %t3 = atomicrmw and  i64* @sc64, i64 %t2 acquire
@@ -90,6 +219,42 @@ define void @atomic_fetch_or64() nounwind {
  ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
  ; X64-NEXT:    lock orq %rax, {{.*}}(%rip)
  ; X64-NEXT:    retq
+;
+; I486-LABEL: atomic_fetch_or64:
+; I486:       # %bb.0:
+; I486-NEXT:    pushl %esi
+; I486-NEXT:    subl $44, %esp
+; I486-NEXT:    leal sc64, %eax
+; I486-NEXT:    movl %esp, %ecx
+; I486-NEXT:    movl $2, 12(%ecx)
+; I486-NEXT:    movl $0, 8(%ecx)
+; I486-NEXT:    movl $3, 4(%ecx)
+; I486-NEXT:    movl $sc64, (%ecx)
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_fetch_or_8
+; I486-NEXT:    leal sc64, %ecx
+; I486-NEXT:    movl %esp, %esi
+; I486-NEXT:    movl $2, 12(%esi)
+; I486-NEXT:    movl $0, 8(%esi)
+; I486-NEXT:    movl $5, 4(%esi)
+; I486-NEXT:    movl $sc64, (%esi)
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_fetch_or_8
+; I486-NEXT:    leal sc64, %ecx
+; I486-NEXT:    movl %esp, %esi
+; I486-NEXT:    movl %edx, 8(%esi)
+; I486-NEXT:    movl %eax, 4(%esi)
+; I486-NEXT:    movl $2, 12(%esi)
+; I486-NEXT:    movl $sc64, (%esi)
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_fetch_or_8
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    addl $44, %esp
+; I486-NEXT:    popl %esi
+; I486-NEXT:    retl
    %t1 = atomicrmw or   i64* @sc64, i64 3 acquire
    %t2 = atomicrmw or   i64* @sc64, i64 5 acquire
    %t3 = atomicrmw or   i64* @sc64, i64 %t2 acquire
@@ -119,6 +284,42 @@ define void @atomic_fetch_xor64() nounwind {
  ; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
  ; X64-NEXT:    lock xorq %rax, {{.*}}(%rip)
  ; X64-NEXT:    retq
+;
+; I486-LABEL: atomic_fetch_xor64:
+; I486:       # %bb.0:
+; I486-NEXT:    pushl %esi
+; I486-NEXT:    subl $44, %esp
+; I486-NEXT:    leal sc64, %eax
+; I486-NEXT:    movl %esp, %ecx
+; I486-NEXT:    movl $2, 12(%ecx)
+; I486-NEXT:    movl $0, 8(%ecx)
+; I486-NEXT:    movl $3, 4(%ecx)
+; I486-NEXT:    movl $sc64, (%ecx)
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_fetch_xor_8
+; I486-NEXT:    leal sc64, %ecx
+; I486-NEXT:    movl %esp, %esi
+; I486-NEXT:    movl $2, 12(%esi)
+; I486-NEXT:    movl $0, 8(%esi)
+; I486-NEXT:    movl $5, 4(%esi)
+; I486-NEXT:    movl $sc64, (%esi)
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_fetch_xor_8
+; I486-NEXT:    leal sc64, %ecx
+; I486-NEXT:    movl %esp, %esi
+; I486-NEXT:    movl %edx, 8(%esi)
+; I486-NEXT:    movl %eax, 4(%esi)
+; I486-NEXT:    movl $2, 12(%esi)
+; I486-NEXT:    movl $sc64, (%esi)
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_fetch_xor_8
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    addl $44, %esp
+; I486-NEXT:    popl %esi
+; I486-NEXT:    retl
    %t1 = atomicrmw xor  i64* @sc64, i64 3 acquire
    %t2 = atomicrmw xor  i64* @sc64, i64 5 acquire
    %t3 = atomicrmw xor  i64* @sc64, i64 %t2 acquire
@@ -146,6 +347,26 @@ define void @atomic_fetch_nand64(i64 %x) nounwind {
  ; X64-NEXT:    jmp .LBB5_1
  ; X64-NEXT:  .LBB5_2: # %atomicrmw.end
  ; X64-NEXT:    retq
+;
+; I486-LABEL: atomic_fetch_nand64:
+; I486:       # %bb.0:
+; I486-NEXT:    pushl %esi
+; I486-NEXT:    subl $28, %esp
+; I486-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; I486-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; I486-NEXT:    leal sc64, %edx
+; I486-NEXT:    movl %esp, %esi
+; I486-NEXT:    movl %eax, 8(%esi)
+; I486-NEXT:    movl %ecx, 4(%esi)
+; I486-NEXT:    movl $2, 12(%esi)
+; I486-NEXT:    movl $sc64, (%esi)
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_fetch_nand_8
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    addl $28, %esp
+; I486-NEXT:    popl %esi
+; I486-NEXT:    retl
    %t1 = atomicrmw nand i64* @sc64, i64 %x acquire
    ret void
  }
@@ -172,6 +393,81 @@ define void @atomic_fetch_max64(i64 %x) nounwind {
  ; X64-NEXT:    jmp .LBB6_1
  ; X64-NEXT:  .LBB6_2: # %atomicrmw.end
  ; X64-NEXT:    retq
+;
+; I486-LABEL: atomic_fetch_max64:
+; I486:       # %bb.0:
+; I486-NEXT:    pushl %ebp
+; I486-NEXT:    movl %esp, %ebp
+; I486-NEXT:    pushl %ebx
+; I486-NEXT:    pushl %edi
+; I486-NEXT:    pushl %esi
+; I486-NEXT:    andl $-8, %esp
+; I486-NEXT:    subl $80, %esp
+; I486-NEXT:    movl 12(%ebp), %eax
+; I486-NEXT:    movl 8(%ebp), %ecx
+; I486-NEXT:    movl sc64+4, %edx
+; I486-NEXT:    movl sc64, %esi
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    jmp .LBB6_1
+; I486-NEXT:  .LBB6_1: # %atomicrmw.start
+; I486-NEXT:    # =>This Inner Loop Header: Depth=1
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; I486-NEXT:    movl %ecx, %edx
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; I486-NEXT:    subl %esi, %edx
+; I486-NEXT:    movl %eax, %edi
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; I486-NEXT:    sbbl %ebx, %edi
+; I486-NEXT:    movl %ecx, %esi
+; I486-NEXT:    movl %eax, %ebx
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    jge .LBB6_4
+; I486-NEXT:  # %bb.3: # %atomicrmw.start
+; I486-NEXT:    # in Loop: Header=BB6_1 Depth=1
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:  .LBB6_4: # %atomicrmw.start
+; I486-NEXT:    # in Loop: Header=BB6_1 Depth=1
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; I486-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; I486-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; I486-NEXT:    movl %esp, %edi
+; I486-NEXT:    movl %eax, 12(%edi)
+; I486-NEXT:    movl %ecx, 8(%edi)
+; I486-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; I486-NEXT:    movl %eax, 4(%edi)
+; I486-NEXT:    movl $2, 20(%edi)
+; I486-NEXT:    movl $2, 16(%edi)
+; I486-NEXT:    movl $sc64, (%edi)
+; I486-NEXT:    calll __atomic_compare_exchange_8
+; I486-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; I486-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; I486-NEXT:    testb %al, %al
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    je .LBB6_1
+; I486-NEXT:    jmp .LBB6_2
+; I486-NEXT:  .LBB6_2: # %atomicrmw.end
+; I486-NEXT:    leal -12(%ebp), %esp
+; I486-NEXT:    popl %esi
+; I486-NEXT:    popl %edi
+; I486-NEXT:    popl %ebx
+; I486-NEXT:    popl %ebp
+; I486-NEXT:    retl
    %t1 = atomicrmw max  i64* @sc64, i64 %x acquire
  
    ret void
@@ -199,6 +495,79 @@ define void @atomic_fetch_min64(i64 %x) nounwind {
  ; X64-NEXT:    jmp .LBB7_1
  ; X64-NEXT:  .LBB7_2: # %atomicrmw.end
  ; X64-NEXT:    retq
+;
+; I486-LABEL: atomic_fetch_min64:
+; I486:       # %bb.0:
+; I486-NEXT:    pushl %ebp
+; I486-NEXT:    movl %esp, %ebp
+; I486-NEXT:    pushl %ebx
+; I486-NEXT:    pushl %edi
+; I486-NEXT:    pushl %esi
+; I486-NEXT:    andl $-8, %esp
+; I486-NEXT:    subl $80, %esp
+; I486-NEXT:    movl 12(%ebp), %eax
+; I486-NEXT:    movl 8(%ebp), %ecx
+; I486-NEXT:    movl sc64+4, %edx
+; I486-NEXT:    movl sc64, %esi
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    jmp .LBB7_1
+; I486-NEXT:  .LBB7_1: # %atomicrmw.start
+; I486-NEXT:    # =>This Inner Loop Header: Depth=1
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; I486-NEXT:    subl %ecx, %edx
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; I486-NEXT:    sbbl %eax, %esi
+; I486-NEXT:    movl %ecx, %edi
+; I486-NEXT:    movl %eax, %ebx
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    jge .LBB7_4
+; I486-NEXT:  # %bb.3: # %atomicrmw.start
+; I486-NEXT:    # in Loop: Header=BB7_1 Depth=1
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:  .LBB7_4: # %atomicrmw.start
+; I486-NEXT:    # in Loop: Header=BB7_1 Depth=1
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; I486-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; I486-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; I486-NEXT:    movl %esp, %edi
+; I486-NEXT:    movl %eax, 12(%edi)
+; I486-NEXT:    movl %ecx, 8(%edi)
+; I486-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; I486-NEXT:    movl %eax, 4(%edi)
+; I486-NEXT:    movl $2, 20(%edi)
+; I486-NEXT:    movl $2, 16(%edi)
+; I486-NEXT:    movl $sc64, (%edi)
+; I486-NEXT:    calll __atomic_compare_exchange_8
+; I486-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; I486-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; I486-NEXT:    testb %al, %al
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    je .LBB7_1
+; I486-NEXT:    jmp .LBB7_2
+; I486-NEXT:  .LBB7_2: # %atomicrmw.end
+; I486-NEXT:    leal -12(%ebp), %esp
+; I486-NEXT:    popl %esi
+; I486-NEXT:    popl %edi
+; I486-NEXT:    popl %ebx
+; I486-NEXT:    popl %ebp
+; I486-NEXT:    retl
    %t1 = atomicrmw min  i64* @sc64, i64 %x acquire
  
    ret void
@@ -226,6 +595,79 @@ define void @atomic_fetch_umax64(i64 %x) nounwind {
  ; X64-NEXT:    jmp .LBB8_1
  ; X64-NEXT:  .LBB8_2: # %atomicrmw.end
  ; X64-NEXT:    retq
+;
+; I486-LABEL: atomic_fetch_umax64:
+; I486:       # %bb.0:
+; I486-NEXT:    pushl %ebp
+; I486-NEXT:    movl %esp, %ebp
+; I486-NEXT:    pushl %ebx
+; I486-NEXT:    pushl %edi
+; I486-NEXT:    pushl %esi
+; I486-NEXT:    andl $-8, %esp
+; I486-NEXT:    subl $80, %esp
+; I486-NEXT:    movl 12(%ebp), %eax
+; I486-NEXT:    movl 8(%ebp), %ecx
+; I486-NEXT:    movl sc64+4, %edx
+; I486-NEXT:    movl sc64, %esi
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    jmp .LBB8_1
+; I486-NEXT:  .LBB8_1: # %atomicrmw.start
+; I486-NEXT:    # =>This Inner Loop Header: Depth=1
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; I486-NEXT:    subl %ecx, %edx
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; I486-NEXT:    sbbl %eax, %esi
+; I486-NEXT:    movl %ecx, %edi
+; I486-NEXT:    movl %eax, %ebx
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    jb .LBB8_4
+; I486-NEXT:  # %bb.3: # %atomicrmw.start
+; I486-NEXT:    # in Loop: Header=BB8_1 Depth=1
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:  .LBB8_4: # %atomicrmw.start
+; I486-NEXT:    # in Loop: Header=BB8_1 Depth=1
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; I486-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; I486-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; I486-NEXT:    movl %esp, %edi
+; I486-NEXT:    movl %eax, 12(%edi)
+; I486-NEXT:    movl %ecx, 8(%edi)
+; I486-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; I486-NEXT:    movl %eax, 4(%edi)
+; I486-NEXT:    movl $2, 20(%edi)
+; I486-NEXT:    movl $2, 16(%edi)
+; I486-NEXT:    movl $sc64, (%edi)
+; I486-NEXT:    calll __atomic_compare_exchange_8
+; I486-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; I486-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; I486-NEXT:    testb %al, %al
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    je .LBB8_1
+; I486-NEXT:    jmp .LBB8_2
+; I486-NEXT:  .LBB8_2: # %atomicrmw.end
+; I486-NEXT:    leal -12(%ebp), %esp
+; I486-NEXT:    popl %esi
+; I486-NEXT:    popl %edi
+; I486-NEXT:    popl %ebx
+; I486-NEXT:    popl %ebp
+; I486-NEXT:    retl
    %t1 = atomicrmw umax i64* @sc64, i64 %x acquire
  
    ret void
@@ -253,6 +695,79 @@ define void @atomic_fetch_umin64(i64 %x) nounwind {
  ; X64-NEXT:    jmp .LBB9_1
  ; X64-NEXT:  .LBB9_2: # %atomicrmw.end
  ; X64-NEXT:    retq
+;
+; I486-LABEL: atomic_fetch_umin64:
+; I486:       # %bb.0:
+; I486-NEXT:    pushl %ebp
+; I486-NEXT:    movl %esp, %ebp
+; I486-NEXT:    pushl %ebx
+; I486-NEXT:    pushl %edi
+; I486-NEXT:    pushl %esi
+; I486-NEXT:    andl $-8, %esp
+; I486-NEXT:    subl $80, %esp
+; I486-NEXT:    movl 12(%ebp), %eax
+; I486-NEXT:    movl 8(%ebp), %ecx
+; I486-NEXT:    movl sc64+4, %edx
+; I486-NEXT:    movl sc64, %esi
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    jmp .LBB9_1
+; I486-NEXT:  .LBB9_1: # %atomicrmw.start
+; I486-NEXT:    # =>This Inner Loop Header: Depth=1
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; I486-NEXT:    subl %ecx, %edx
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; I486-NEXT:    sbbl %eax, %esi
+; I486-NEXT:    movl %ecx, %edi
+; I486-NEXT:    movl %eax, %ebx
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    jae .LBB9_4
+; I486-NEXT:  # %bb.3: # %atomicrmw.start
+; I486-NEXT:    # in Loop: Header=BB9_1 Depth=1
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:  .LBB9_4: # %atomicrmw.start
+; I486-NEXT:    # in Loop: Header=BB9_1 Depth=1
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; I486-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; I486-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; I486-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; I486-NEXT:    movl %esp, %edi
+; I486-NEXT:    movl %eax, 12(%edi)
+; I486-NEXT:    movl %ecx, 8(%edi)
+; I486-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; I486-NEXT:    movl %eax, 4(%edi)
+; I486-NEXT:    movl $2, 20(%edi)
+; I486-NEXT:    movl $2, 16(%edi)
+; I486-NEXT:    movl $sc64, (%edi)
+; I486-NEXT:    calll __atomic_compare_exchange_8
+; I486-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; I486-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; I486-NEXT:    testb %al, %al
+; I486-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    je .LBB9_1
+; I486-NEXT:    jmp .LBB9_2
+; I486-NEXT:  .LBB9_2: # %atomicrmw.end
+; I486-NEXT:    leal -12(%ebp), %esp
+; I486-NEXT:    popl %esi
+; I486-NEXT:    popl %edi
+; I486-NEXT:    popl %ebx
+; I486-NEXT:    popl %ebp
+; I486-NEXT:    retl
    %t1 = atomicrmw umin i64* @sc64, i64 %x acquire
  
    ret void
@@ -267,6 +782,30 @@ define void @atomic_fetch_cmpxchg64() nounwind {
  ; X64-NEXT:    lock cmpxchgq %rcx, {{.*}}(%rip)
  ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
  ; X64-NEXT:    retq
+;
+; I486-LABEL: atomic_fetch_cmpxchg64:
+; I486:       # %bb.0:
+; I486-NEXT:    pushl %ebp
+; I486-NEXT:    movl %esp, %ebp
+; I486-NEXT:    andl $-8, %esp
+; I486-NEXT:    subl $40, %esp
+; I486-NEXT:    leal sc64, %eax
+; I486-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; I486-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; I486-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; I486-NEXT:    movl %esp, %edx
+; I486-NEXT:    movl %ecx, 4(%edx)
+; I486-NEXT:    movl $2, 20(%edx)
+; I486-NEXT:    movl $2, 16(%edx)
+; I486-NEXT:    movl $0, 12(%edx)
+; I486-NEXT:    movl $1, 8(%edx)
+; I486-NEXT:    movl $sc64, (%edx)
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_compare_exchange_8
+; I486-NEXT:    movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; I486-NEXT:    movl %ebp, %esp
+; I486-NEXT:    popl %ebp
+; I486-NEXT:    retl
    %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire
    ret void
  }
@@ -276,6 +815,24 @@ define void @atomic_fetch_store64(i64 %x) nounwind {
  ; X64:       # %bb.0:
  ; X64-NEXT:    movq %rdi, {{.*}}(%rip)
  ; X64-NEXT:    retq
+;
+; I486-LABEL: atomic_fetch_store64:
+; I486:       # %bb.0:
+; I486-NEXT:    pushl %esi
+; I486-NEXT:    subl $20, %esp
+; I486-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; I486-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; I486-NEXT:    leal sc64, %edx
+; I486-NEXT:    movl %esp, %esi
+; I486-NEXT:    movl %eax, 8(%esi)
+; I486-NEXT:    movl %ecx, 4(%esi)
+; I486-NEXT:    movl $3, 12(%esi)
+; I486-NEXT:    movl $sc64, (%esi)
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_store_8
+; I486-NEXT:    addl $20, %esp
+; I486-NEXT:    popl %esi
+; I486-NEXT:    retl
    store atomic i64 %x, i64* @sc64 release, align 8
    ret void
  }
@@ -286,6 +843,26 @@ define void @atomic_fetch_swap64(i64 %x) nounwind {
  ; X64-NEXT:    xchgq %rdi, {{.*}}(%rip)
  ; X64-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
  ; X64-NEXT:    retq
+;
+; I486-LABEL: atomic_fetch_swap64:
+; I486:       # %bb.0:
+; I486-NEXT:    pushl %esi
+; I486-NEXT:    subl $28, %esp
+; I486-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; I486-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; I486-NEXT:    leal sc64, %edx
+; I486-NEXT:    movl %esp, %esi
+; I486-NEXT:    movl %eax, 8(%esi)
+; I486-NEXT:    movl %ecx, 4(%esi)
+; I486-NEXT:    movl $2, 12(%esi)
+; I486-NEXT:    movl $sc64, (%esi)
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_exchange_8
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    addl $28, %esp
+; I486-NEXT:    popl %esi
+; I486-NEXT:    retl
    %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
    ret void
  }
@@ -297,6 +874,32 @@ define void @atomic_fetch_swapf64(double %x) nounwind {
  ; X64-NEXT:    xchgq %rax, {{.*}}(%rip)
  ; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
  ; X64-NEXT:    retq
+;
+; I486-LABEL: atomic_fetch_swapf64:
+; I486:       # %bb.0:
+; I486-NEXT:    pushl %ebp
+; I486-NEXT:    movl %esp, %ebp
+; I486-NEXT:    pushl %esi
+; I486-NEXT:    andl $-8, %esp
+; I486-NEXT:    subl $48, %esp
+; I486-NEXT:    fldl 8(%ebp)
+; I486-NEXT:    leal fsc64, %eax
+; I486-NEXT:    fstpl {{[0-9]+}}(%esp)
+; I486-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; I486-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; I486-NEXT:    movl %esp, %esi
+; I486-NEXT:    movl %edx, 8(%esi)
+; I486-NEXT:    movl %ecx, 4(%esi)
+; I486-NEXT:    movl $2, 12(%esi)
+; I486-NEXT:    movl $fsc64, (%esi)
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    calll __atomic_exchange_8
+; I486-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; I486-NEXT:    leal -4(%ebp), %esp
+; I486-NEXT:    popl %esi
+; I486-NEXT:    popl %ebp
+; I486-NEXT:    retl
    %t1 = atomicrmw xchg double* @fsc64, double %x acquire
    ret void
  }
diff --git a/llvm/test/CodeGen/X86/cmpxchg8b.ll b/llvm/test/CodeGen/X86/cmpxchg8b.ll

index fa8fff8fd7dca0bcc0fb831246c08ac40c9e4b5f..8eb3dda6b6eba6560f8da81a62f483e920d0801d 100644 (file)
--- a/llvm/test/CodeGen/X86/cmpxchg8b.ll
+++ b/llvm/test/CodeGen/X86/cmpxchg8b.ll
@@ -1,6 +1,7 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
  ; RUN: llc < %s -mtriple=i686-unknown- -mcpu=core2 | FileCheck %s --check-prefixes=CHECK,X86
  ; RUN: llc < %s -mtriple=x86_64-unknown- -mcpu=core2 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -mtriple=i686-unknown- -mcpu=i486 | FileCheck %s --check-prefixes=I486
  
  ; Basic 64-bit cmpxchg
  define void @t1(i64* nocapture %p) nounwind ssp {
@@ -24,6 +25,28 @@ define void @t1(i64* nocapture %p) nounwind ssp {
  ; X64-NEXT:    xorl %eax, %eax
  ; X64-NEXT:    lock cmpxchgq %rcx, (%rdi)
  ; X64-NEXT:    retq
+;
+; I486-LABEL: t1:
+; I486:       # %bb.0: # %entry
+; I486-NEXT:    pushl %ebp
+; I486-NEXT:    movl %esp, %ebp
+; I486-NEXT:    andl $-8, %esp
+; I486-NEXT:    subl $8, %esp
+; I486-NEXT:    movl 8(%ebp), %eax
+; I486-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; I486-NEXT:    movl $0, (%esp)
+; I486-NEXT:    movl %esp, %ecx
+; I486-NEXT:    pushl $5
+; I486-NEXT:    pushl $5
+; I486-NEXT:    pushl $0
+; I486-NEXT:    pushl $1
+; I486-NEXT:    pushl %ecx
+; I486-NEXT:    pushl %eax
+; I486-NEXT:    calll __atomic_compare_exchange_8
+; I486-NEXT:    addl $24, %esp
+; I486-NEXT:    movl %ebp, %esp
+; I486-NEXT:    popl %ebp
+; I486-NEXT:    retl
  entry:
    %r = cmpxchg i64* %p, i64 0, i64 1 seq_cst seq_cst
    ret void
author	Craig Topper <craig.topper@intel.com>
	Wed, 20 Mar 2019 23:35:49 +0000 (23:35 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Wed, 20 Mar 2019 23:35:49 +0000 (23:35 +0000)
llvm/lib/Support/Host.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86.td		patch \| blob \| history
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86InstrCompiler.td		patch \| blob \| history
llvm/lib/Target/X86/X86InstrInfo.td		patch \| blob \| history
llvm/lib/Target/X86/X86Subtarget.h		patch \| blob \| history
llvm/test/CodeGen/X86/atomic64.ll		patch \| blob \| history
llvm/test/CodeGen/X86/cmpxchg8b.ll		patch \| blob \| history