[LLVM] Add the support for fmax and fmin in atomicrmw instruction

author Shilei Tian <i@tianshilei.me>

Wed, 6 Jul 2022 14:57:24 +0000 (10:57 -0400)

committer Shilei Tian <i@tianshilei.me>

Wed, 6 Jul 2022 14:57:53 +0000 (10:57 -0400)
author Shilei Tian <i@tianshilei.me>
Wed, 6 Jul 2022 14:57:24 +0000 (10:57 -0400)
committer Shilei Tian <i@tianshilei.me>
Wed, 6 Jul 2022 14:57:53 +0000 (10:57 -0400)
diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst

index 3d251c5..fdf0b7b 100644 (file)
--- a/llvm/docs/GlobalISel/GenericOpcode.rst
+++ b/llvm/docs/GlobalISel/GenericOpcode.rst
@@ -727,7 +727,10 @@ G_ATOMIC_CMPXCHG
  Generic atomic cmpxchg. Expects a MachineMemOperand in addition to explicit
  operands.
  
-G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_NAND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_FADD, G_ATOMICRMW_FSUB
+G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
+G_ATOMICRMW_NAND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MAX,
+G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_FADD,
+G_ATOMICRMW_FSUB, G_ATOMICRMW_FMAX, G_ATOMICRMW_FMIN
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  
  Generic atomicrmw. Expects a MachineMemOperand in addition to explicit
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst

index b2073a1..889eca2 100644 (file)
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -10330,12 +10330,14 @@ operation. The operation must be one of the following keywords:
  -  umin
  -  fadd
  -  fsub
+-  fmax
+-  fmin
  
  For most of these operations, the type of '<value>' must be an integer
  type whose bit width is a power of two greater than or equal to eight
  and less than or equal to a target-specific size limit. For xchg, this
  may also be a floating point or a pointer type with the same size constraints
-as integers.  For fadd/fsub, this must be a floating point type.  The
+as integers.  For fadd/fsub/fmax/fmin, this must be a floating point type.  The
  type of the '``<pointer>``' operand must be a pointer to that type. If
  the ``atomicrmw`` is marked as ``volatile``, then the optimizer is not
  allowed to modify the number or order of execution of this
@@ -10372,6 +10374,8 @@ operation argument:
  -  umin: ``*ptr = *ptr < val ? *ptr : val`` (using an unsigned comparison)
  - fadd: ``*ptr = *ptr + val`` (using floating point arithmetic)
  - fsub: ``*ptr = *ptr - val`` (using floating point arithmetic)
+-  fmax: ``*ptr = maxnum(*ptr, val)`` (match the `llvm.maxnum.*`` intrinsic)
+-  fmin: ``*ptr = minnum(*ptr, val)`` (match the `llvm.minnum.*`` intrinsic)
  
  Example:
  """"""""
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst

index b611214..5849335 100644 (file)
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -76,6 +76,9 @@ Changes to the LLVM IR
    * ``sdiv``
    * ``urem``
    * ``srem``
+* Added the support for ``fmax`` and ``fmin`` in ``atomicrmw`` instruction. The
+  comparison is expected to match the behavior of ``llvm.maxnum.*`` and
+  ``llvm.minnum.*`` respectively.
  
  Changes to building LLVM
  ------------------------
diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h

index ee99be2..f031554 100644 (file)
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -381,8 +381,14 @@ typedef enum {
                                the old one */
      LLVMAtomicRMWBinOpFAdd, /**< Add a floating point value and return the
                                old one */
-    LLVMAtomicRMWBinOpFSub /**< Subtract a floating point value and return the
-                             old one */
+    LLVMAtomicRMWBinOpFSub, /**< Subtract a floating point value and return the
+                            old one */
+    LLVMAtomicRMWBinOpFMax, /**< Sets the value if it's greater than the
+                             original using an floating point comparison and
+                             return the old one */
+    LLVMAtomicRMWBinOpFMin, /**< Sets the value if it's smaller than the
+                             original using an floating point comparison and
+                             return the old one */
  } LLVMAtomicRMWBinOp;
  
  typedef enum {
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h

index 230a166..1ec0072 100644 (file)
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -221,6 +221,8 @@ enum Kind {
    kw_min,
    kw_umax,
    kw_umin,
+  kw_fmax,
+  kw_fmin,
  
    // Instruction Opcodes (Opcode in UIntVal).
    kw_fneg,
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h

index 5d96204..cffa5db 100644 (file)
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -458,7 +458,9 @@ enum RMWOperations {
    RMW_UMAX = 9,
    RMW_UMIN = 10,
    RMW_FADD = 11,
-  RMW_FSUB = 12
+  RMW_FSUB = 12,
+  RMW_FMAX = 13,
+  RMW_FMIN = 14
  };
  
  /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h

index 16ba568..8dda869 100644 (file)
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1401,6 +1401,40 @@ public:
          const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
          MachineMemOperand &MMO);
  
+  /// Build and insert `OldValRes<def> = G_ATOMICRMW_FMAX Addr, Val, MMO`.
+  ///
+  /// Atomically replace the value at \p Addr with the floating point maximum of
+  /// \p Val and the original value. Puts the original value from \p Addr in \p
+  /// OldValRes.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p OldValRes must be a generic virtual register.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  /// \pre \p OldValRes, and \p Val must be generic virtual registers of the
+  ///      same type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAtomicRMWFMax(
+        const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
+        MachineMemOperand &MMO);
+
+  /// Build and insert `OldValRes<def> = G_ATOMICRMW_FMIN Addr, Val, MMO`.
+  ///
+  /// Atomically replace the value at \p Addr with the floating point minimum of
+  /// \p Val and the original value. Puts the original value from \p Addr in \p
+  /// OldValRes.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p OldValRes must be a generic virtual register.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  /// \pre \p OldValRes, and \p Val must be generic virtual registers of the
+  ///      same type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAtomicRMWFMin(
+        const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
+        MachineMemOperand &MMO);
+
    /// Build and insert `G_FENCE Ordering, Scope`.
    MachineInstrBuilder buildFence(unsigned Ordering, unsigned Scope);
  
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h

index 120f899..d7de19c 100644 (file)
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1195,6 +1195,8 @@ enum NodeType {
    ATOMIC_LOAD_UMAX,
    ATOMIC_LOAD_FADD,
    ATOMIC_LOAD_FSUB,
+  ATOMIC_LOAD_FMAX,
+  ATOMIC_LOAD_FMIN,
  
    // Masked load and store - consecutive vector load and store operations
    // with additional mask operand that prevents memory accesses to the
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h

index 5974f13..c531ddf 100644 (file)
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1403,6 +1403,8 @@ public:
      case ISD::ATOMIC_LOAD_UMAX:
      case ISD::ATOMIC_LOAD_FADD:
      case ISD::ATOMIC_LOAD_FSUB:
+    case ISD::ATOMIC_LOAD_FMAX:
+    case ISD::ATOMIC_LOAD_FMIN:
      case ISD::ATOMIC_LOAD:
      case ISD::ATOMIC_STORE:
      case ISD::MLOAD:
@@ -1468,6 +1470,8 @@ public:
             N->getOpcode() == ISD::ATOMIC_LOAD_UMAX    ||
             N->getOpcode() == ISD::ATOMIC_LOAD_FADD    ||
             N->getOpcode() == ISD::ATOMIC_LOAD_FSUB    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_FMAX    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_FMIN    ||
             N->getOpcode() == ISD::ATOMIC_LOAD         ||
             N->getOpcode() == ISD::ATOMIC_STORE;
    }
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h

index d152e86..a14bc39 100644 (file)
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -753,8 +753,16 @@ public:
      /// *p = old - v
      FSub,
  
+    /// *p = maxnum(old, v)
+    /// \p maxnum matches the behavior of \p llvm.maxnum.*.
+    FMax,
+
+    /// *p = minnum(old, v)
+    /// \p minnum matches the behavior of \p llvm.minnum.*.
+    FMin,
+
      FIRST_BINOP = Xchg,
-    LAST_BINOP = FSub,
+    LAST_BINOP = FMin,
      BAD_BINOP
    };
  
@@ -797,6 +805,8 @@ public:
      switch (Op) {
      case AtomicRMWInst::FAdd:
      case AtomicRMWInst::FSub:
+    case AtomicRMWInst::FMax:
+    case AtomicRMWInst::FMin:
        return true;
      default:
        return false;
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def

index 8df7ced..5d6be0f 100644 (file)
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -384,6 +384,8 @@ HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMAX)
  HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMIN)
  HANDLE_TARGET_OPCODE(G_ATOMICRMW_FADD)
  HANDLE_TARGET_OPCODE(G_ATOMICRMW_FSUB)
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMAX)
+HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMIN)
  
  // Generic atomic fence
  HANDLE_TARGET_OPCODE(G_FENCE)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td

index 3e2f18b..5652e60 100644 (file)
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -1126,6 +1126,8 @@ def G_ATOMICRMW_UMAX : G_ATOMICRMW_OP;
  def G_ATOMICRMW_UMIN : G_ATOMICRMW_OP;
  def G_ATOMICRMW_FADD : G_ATOMICRMW_OP;
  def G_ATOMICRMW_FSUB : G_ATOMICRMW_OP;
+def G_ATOMICRMW_FMAX : G_ATOMICRMW_OP;
+def G_ATOMICRMW_FMIN : G_ATOMICRMW_OP;
  
  def G_FENCE : GenericInstruction {
    let OutOperandList = (outs);
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td

index 12eee24..b1a07e8 100644 (file)
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -206,6 +206,8 @@ def : GINodeEquiv<G_ATOMICRMW_UMIN, atomic_load_umin>;
  def : GINodeEquiv<G_ATOMICRMW_UMAX, atomic_load_umax>;
  def : GINodeEquiv<G_ATOMICRMW_FADD, atomic_load_fadd>;
  def : GINodeEquiv<G_ATOMICRMW_FSUB, atomic_load_fsub>;
+def : GINodeEquiv<G_ATOMICRMW_FMAX, atomic_load_fmax>;
+def : GINodeEquiv<G_ATOMICRMW_FMIN, atomic_load_fmin>;
  def : GINodeEquiv<G_FENCE, atomic_fence>;
  
  // Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern.
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td

index 47b686a..a19a092 100644 (file)
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -651,6 +651,10 @@ def atomic_load_fadd : SDNode<"ISD::ATOMIC_LOAD_FADD" , SDTFPAtomic2,
                      [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
  def atomic_load_fsub : SDNode<"ISD::ATOMIC_LOAD_FSUB" , SDTFPAtomic2,
                      [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_fmax : SDNode<"ISD::ATOMIC_LOAD_FMAX", SDTFPAtomic2,
+                    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_fmin : SDNode<"ISD::ATOMIC_LOAD_FMIN", SDTFPAtomic2,
+                    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
  
  def atomic_load      : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad,
                      [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp

index 30e6f85..c2e03d4 100644 (file)
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -661,7 +661,7 @@ lltok::Kind LLLexer::LexIdentifier() {
    KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
  
    KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
-  KEYWORD(umin);
+  KEYWORD(umin); KEYWORD(fmax); KEYWORD(fmin);
  
    KEYWORD(vscale);
    KEYWORD(x);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp

index 840213b..e09da7c 100644 (file)
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -7463,6 +7463,14 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
      Operation = AtomicRMWInst::FSub;
      IsFP = true;
      break;
+  case lltok::kw_fmax:
+    Operation = AtomicRMWInst::FMax;
+    IsFP = true;
+    break;
+  case lltok::kw_fmin:
+    Operation = AtomicRMWInst::FMin;
+    IsFP = true;
+    break;
    }
    Lex.Lex();  // Eat the operation.
  
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp

index 7be440a..47c79cd 100644 (file)
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1243,6 +1243,8 @@ static AtomicRMWInst::BinOp getDecodedRMWOperation(unsigned Val) {
    case bitc::RMW_UMIN: return AtomicRMWInst::UMin;
    case bitc::RMW_FADD: return AtomicRMWInst::FAdd;
    case bitc::RMW_FSUB: return AtomicRMWInst::FSub;
+  case bitc::RMW_FMAX: return AtomicRMWInst::FMax;
+  case bitc::RMW_FMIN: return AtomicRMWInst::FMin;
    }
  }
  
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

index fa798a4..c4ef8b5 100644 (file)
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -577,6 +577,8 @@ static unsigned getEncodedRMWOperation(AtomicRMWInst::BinOp Op) {
    case AtomicRMWInst::UMin: return bitc::RMW_UMIN;
    case AtomicRMWInst::FAdd: return bitc::RMW_FADD;
    case AtomicRMWInst::FSub: return bitc::RMW_FSUB;
+  case AtomicRMWInst::FMax: return bitc::RMW_FMAX;
+  case AtomicRMWInst::FMin: return bitc::RMW_FMIN;
    }
  }
  
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp

index 5ce6fbb..ad9dc51 100644 (file)
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1646,6 +1646,8 @@ static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
    case AtomicRMWInst::Min:
    case AtomicRMWInst::UMax:
    case AtomicRMWInst::UMin:
+  case AtomicRMWInst::FMax:
+  case AtomicRMWInst::FMin:
    case AtomicRMWInst::FAdd:
    case AtomicRMWInst::FSub:
      // No atomic libcalls are available for max/min/umax/umin.
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

index a2af66d..dcd17fe 100644 (file)
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2883,6 +2883,12 @@ bool IRTranslator::translateAtomicRMW(const User &U,
    case AtomicRMWInst::FSub:
      Opcode = TargetOpcode::G_ATOMICRMW_FSUB;
      break;
+  case AtomicRMWInst::FMax:
+    Opcode = TargetOpcode::G_ATOMICRMW_FMAX;
+    break;
+  case AtomicRMWInst::FMin:
+    Opcode = TargetOpcode::G_ATOMICRMW_FMIN;
+    break;
    }
  
    MIRBuilder.buildAtomicRMW(
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp

index 19ebf46..9fed066 100644 (file)
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -938,6 +938,20 @@ MachineIRBuilder::buildAtomicRMWFSub(const DstOp &OldValRes, const SrcOp &Addr,
  }
  
  MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWFMax(const DstOp &OldValRes, const SrcOp &Addr,
+                                     const SrcOp &Val, MachineMemOperand &MMO) {
+  return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FMAX, OldValRes, Addr, Val,
+                        MMO);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWFMin(const DstOp &OldValRes, const SrcOp &Addr,
+                                     const SrcOp &Val, MachineMemOperand &MMO) {
+  return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FMIN, OldValRes, Addr, Val,
+                        MMO);
+}
+
+MachineInstrBuilder
  MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) {
    return buildInstr(TargetOpcode::G_FENCE)
      .addImm(Ordering)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index b3b8756..7f381f7 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7505,6 +7505,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
            Opcode == ISD::ATOMIC_LOAD_UMAX ||
            Opcode == ISD::ATOMIC_LOAD_FADD ||
            Opcode == ISD::ATOMIC_LOAD_FSUB ||
+          Opcode == ISD::ATOMIC_LOAD_FMAX ||
+          Opcode == ISD::ATOMIC_LOAD_FMIN ||
            Opcode == ISD::ATOMIC_SWAP ||
            Opcode == ISD::ATOMIC_STORE) &&
           "Invalid Atomic Op");
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index b3e8f30..4170a90 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4611,6 +4611,8 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
    case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
    case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break;
    case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break;
+  case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break;
+  case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break;
    }
    AtomicOrdering Ordering = I.getOrdering();
    SyncScope::ID SSID = I.getSyncScopeID();
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

index 3987a2c..c1175b2 100644 (file)
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3962,6 +3962,8 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
    case AtomicRMWInst::Min:
    case AtomicRMWInst::UMax:
    case AtomicRMWInst::UMin:
+  case AtomicRMWInst::FMax:
+  case AtomicRMWInst::FMin:
      llvm_unreachable("Unsupported atomic update operation");
    }
    llvm_unreachable("Unsupported atomic update operation");
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp

index 23c0705..cc1f7c1 100644 (file)
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -3620,6 +3620,8 @@ static AtomicRMWInst::BinOp mapFromLLVMRMWBinOp(LLVMAtomicRMWBinOp BinOp) {
      case LLVMAtomicRMWBinOpUMin: return AtomicRMWInst::UMin;
      case LLVMAtomicRMWBinOpFAdd: return AtomicRMWInst::FAdd;
      case LLVMAtomicRMWBinOpFSub: return AtomicRMWInst::FSub;
+    case LLVMAtomicRMWBinOpFMax: return AtomicRMWInst::FMax;
+    case LLVMAtomicRMWBinOpFMin: return AtomicRMWInst::FMin;
    }
  
    llvm_unreachable("Invalid LLVMAtomicRMWBinOp value!");
@@ -3640,6 +3642,8 @@ static LLVMAtomicRMWBinOp mapToLLVMRMWBinOp(AtomicRMWInst::BinOp BinOp) {
      case AtomicRMWInst::UMin: return LLVMAtomicRMWBinOpUMin;
      case AtomicRMWInst::FAdd: return LLVMAtomicRMWBinOpFAdd;
      case AtomicRMWInst::FSub: return LLVMAtomicRMWBinOpFSub;
+    case AtomicRMWInst::FMax: return LLVMAtomicRMWBinOpFMax;
+    case AtomicRMWInst::FMin: return LLVMAtomicRMWBinOpFMin;
      default: break;
    }
  
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp

index 6a91edb..495f86b 100644 (file)
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -1696,6 +1696,10 @@ StringRef AtomicRMWInst::getOperationName(BinOp Op) {
      return "fadd";
    case AtomicRMWInst::FSub:
      return "fsub";
+  case AtomicRMWInst::FMax:
+    return "fmax";
+  case AtomicRMWInst::FMin:
+    return "fmin";
    case AtomicRMWInst::BAD_BINOP:
      return "<invalid operation>";
    }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

index ef79290..bf520a5 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4803,6 +4803,8 @@ AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
    case AtomicRMWInst::Nand:
    case AtomicRMWInst::FAdd:
    case AtomicRMWInst::FSub:
+  case AtomicRMWInst::FMax:
+  case AtomicRMWInst::FMin:
      return AtomicExpansionKind::CmpXChg;
    default:
      return AtomicExpansionKind::None;
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp

index 494a71e..3e09270 100644 (file)
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
@@ -595,6 +595,10 @@ unsigned DXILBitcodeWriter::getEncodedRMWOperation(AtomicRMWInst::BinOp Op) {
      return bitc::RMW_FADD;
    case AtomicRMWInst::FSub:
      return bitc::RMW_FSUB;
+  case AtomicRMWInst::FMax:
+    return bitc::RMW_FMAX;
+  case AtomicRMWInst::FMin:
+    return bitc::RMW_FMIN;
    }
  }
  
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index d54d6e4..934bc2a 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30800,6 +30800,8 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
    case AtomicRMWInst::UMin:
    case AtomicRMWInst::FAdd:
    case AtomicRMWInst::FSub:
+  case AtomicRMWInst::FMax:
+  case AtomicRMWInst::FMin:
      // These always require a non-trivial set of data operations on x86. We must
      // use a cmpxchg loop.
      return AtomicExpansionKind::CmpXChg;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp

index 2540e54..0327efb 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
@@ -61,7 +61,13 @@ bool isIdempotentRMW(AtomicRMWInst& RMWI) {
  /// equivalent to its value operand.
  bool isSaturating(AtomicRMWInst& RMWI) {
    if (auto CF = dyn_cast<ConstantFP>(RMWI.getValOperand()))
-    switch(RMWI.getOperation()) {
+    switch (RMWI.getOperation()) {
+    case AtomicRMWInst::FMax:
+      // maxnum(x, +inf) -> +inf
+      return !CF->isNegative() && CF->isInfinity();
+    case AtomicRMWInst::FMin:
+      // minnum(x, -inf) -> +inf
+      return CF->isNegative() && CF->isInfinity();
      case AtomicRMWInst::FAdd:
      case AtomicRMWInst::FSub:
        return CF->isNaN();
diff --git a/llvm/lib/Transforms/Utils/LowerAtomic.cpp b/llvm/lib/Transforms/Utils/LowerAtomic.cpp

index 8641581..9914a5c 100644 (file)
--- a/llvm/lib/Transforms/Utils/LowerAtomic.cpp
+++ b/llvm/lib/Transforms/Utils/LowerAtomic.cpp
@@ -74,6 +74,10 @@ Value *llvm::buildAtomicRMWValue(AtomicRMWInst::BinOp Op,
      return Builder.CreateFAdd(Loaded, Inc, "new");
    case AtomicRMWInst::FSub:
      return Builder.CreateFSub(Loaded, Inc, "new");
+  case AtomicRMWInst::FMax:
+    return Builder.CreateMaxNum(Loaded, Inc);
+  case AtomicRMWInst::FMin:
+    return Builder.CreateMinNum(Loaded, Inc);
    default:
      llvm_unreachable("Unknown atomic op");
    }
diff --git a/llvm/test/Assembler/atomic.ll b/llvm/test/Assembler/atomic.ll

index 04ff262..d9df167 100644 (file)
--- a/llvm/test/Assembler/atomic.ll
+++ b/llvm/test/Assembler/atomic.ll
@@ -47,5 +47,17 @@ define void @fp_atomics(float* %x) {
    ; CHECK: atomicrmw volatile fadd float* %x, float 1.000000e+00 seq_cst
    atomicrmw volatile fadd float* %x, float 1.0 seq_cst
  
+  ; CHECK: atomicrmw fmax float* %x, float 1.000000e+00 seq_cst
+  atomicrmw fmax float* %x, float 1.0 seq_cst
+
+  ; CHECK: atomicrmw volatile fmax float* %x, float 1.000000e+00 seq_cst
+  atomicrmw volatile fmax float* %x, float 1.0 seq_cst
+
+  ; CHECK: atomicrmw fmin float* %x, float 1.000000e+00 seq_cst
+  atomicrmw fmin float* %x, float 1.0 seq_cst
+
+  ; CHECK: atomicrmw volatile fmin float* %x, float 1.000000e+00 seq_cst
+  atomicrmw volatile fmin float* %x, float 1.0 seq_cst
+
    ret void
  }
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll

index 2cc23f7..1c6d611 100644 (file)
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -863,6 +863,12 @@ define void @fp_atomics(float* %word) {
  ; CHECK: %atomicrmw.fsub = atomicrmw fsub float* %word, float 1.000000e+00 monotonic
    %atomicrmw.fsub = atomicrmw fsub float* %word, float 1.0 monotonic
  
+; CHECK: %atomicrmw.fmax = atomicrmw fmax float* %word, float 1.000000e+00 monotonic
+  %atomicrmw.fmax = atomicrmw fmax float* %word, float 1.0 monotonic
+
+; CHECK: %atomicrmw.fmin = atomicrmw fmin float* %word, float 1.000000e+00 monotonic
+  %atomicrmw.fmin = atomicrmw fmin float* %word, float 1.0 monotonic
+
    ret void
  }
  
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

index 881b7f8..8d5fde6 100644 (file)
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -234,6 +234,12 @@
  # DEBUG-NEXT: G_ATOMICRMW_FSUB (opcode {{[0-9]+}}): 2 type indices
  # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
  # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: G_ATOMICRMW_FMAX (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: G_ATOMICRMW_FMIN (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
  # DEBUG-NEXT: G_FENCE (opcode {{[0-9]+}}): 0 type indices
  # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
  # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
@@ -689,7 +695,7 @@
  # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
  # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
  # DEBUG-NEXT: G_VECREDUCE_OR (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
-# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected 
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
  # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
  # DEBUG-NEXT: G_VECREDUCE_XOR (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
  # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
diff --git a/llvm/test/Transforms/InstCombine/atomicrmw.ll b/llvm/test/Transforms/InstCombine/atomicrmw.ll

index fbed4dd..4b01c81 100644 (file)
--- a/llvm/test/Transforms/InstCombine/atomicrmw.ll
+++ b/llvm/test/Transforms/InstCombine/atomicrmw.ll
@@ -294,5 +294,34 @@ define i32 @undef_operand_used(i32* %addr) {
    ret i32 %res
  }
  
+; CHECK-LABEL: sat_fmax_inf
+; CHECK-NEXT: %res = atomicrmw xchg double* %addr, double 0x7FF0000000000000 monotonic
+; CHECK-NEXT: ret double %res
+define double @sat_fmax_inf(double* %addr) {
+  %res = atomicrmw fmax double* %addr, double 0x7FF0000000000000 monotonic
+  ret double %res
+}
  
+; CHECK-LABEL: no_sat_fmax_inf
+; CHECK-NEXT: %res = atomicrmw fmax double* %addr, double 1.000000e-01 monotonic
+; CHECK-NEXT: ret double %res
+define double @no_sat_fmax_inf(double* %addr) {
+  %res = atomicrmw fmax double* %addr, double 1.000000e-01 monotonic
+  ret double %res
+}
  
+; CHECK-LABEL: sat_fmin_inf
+; CHECK-NEXT: %res = atomicrmw xchg double* %addr, double 0xFFF0000000000000 monotonic
+; CHECK-NEXT: ret double %res
+define double @sat_fmin_inf(double* %addr) {
+  %res = atomicrmw fmin double* %addr, double 0xFFF0000000000000 monotonic
+  ret double %res
+}
+
+; CHECK-LABEL: no_sat_fmin_inf
+; CHECK-NEXT: %res = atomicrmw fmin double* %addr, double 1.000000e-01 monotonic
+; CHECK-NEXT: ret double %res
+define double @no_sat_fmin_inf(double* %addr) {
+  %res = atomicrmw fmin double* %addr, double 1.000000e-01 monotonic
+  ret double %res
+}
diff --git a/llvm/test/Transforms/LowerAtomic/atomic-load.ll b/llvm/test/Transforms/LowerAtomic/atomic-load.ll

index f4988c2..c52206a 100644 (file)
--- a/llvm/test/Transforms/LowerAtomic/atomic-load.ll
+++ b/llvm/test/Transforms/LowerAtomic/atomic-load.ll
@@ -57,3 +57,25 @@ define float @fsub() {
    ret float %j
  ; CHECK: ret float [[INST]]
  }
+
+define float @fmax() {
+; CHECK-LABEL: @fmax(
+  %i = alloca float
+  %j = atomicrmw fmax float* %i, float 42.0 monotonic
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: call float @llvm.maxnum.f32
+; CHECK-NEXT: store
+  ret float %j
+; CHECK: ret float [[INST]]
+}
+
+define float @fmin() {
+; CHECK-LABEL: @fmin(
+  %i = alloca float
+  %j = atomicrmw fmin float* %i, float 42.0 monotonic
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: call float @llvm.minnum.f32
+; CHECK-NEXT: store
+  ret float %j
+; CHECK: ret float [[INST]]
+}
author	Shilei Tian <i@tianshilei.me>
	Wed, 6 Jul 2022 14:57:24 +0000 (10:57 -0400)
committer	Shilei Tian <i@tianshilei.me>
	Wed, 6 Jul 2022 14:57:53 +0000 (10:57 -0400)
llvm/docs/GlobalISel/GenericOpcode.rst		patch \| blob \| history
llvm/docs/LangRef.rst		patch \| blob \| history
llvm/docs/ReleaseNotes.rst		patch \| blob \| history
llvm/include/llvm-c/Core.h		patch \| blob \| history
llvm/include/llvm/AsmParser/LLToken.h		patch \| blob \| history
llvm/include/llvm/Bitcode/LLVMBitCodes.h		patch \| blob \| history
llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h		patch \| blob \| history
llvm/include/llvm/CodeGen/ISDOpcodes.h		patch \| blob \| history
llvm/include/llvm/CodeGen/SelectionDAGNodes.h		patch \| blob \| history
llvm/include/llvm/IR/Instructions.h		patch \| blob \| history
llvm/include/llvm/Support/TargetOpcodes.def		patch \| blob \| history
llvm/include/llvm/Target/GenericOpcodes.td		patch \| blob \| history
llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td		patch \| blob \| history
llvm/include/llvm/Target/TargetSelectionDAG.td		patch \| blob \| history
llvm/lib/AsmParser/LLLexer.cpp		patch \| blob \| history
llvm/lib/AsmParser/LLParser.cpp		patch \| blob \| history
llvm/lib/Bitcode/Reader/BitcodeReader.cpp		patch \| blob \| history
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp		patch \| blob \| history
llvm/lib/CodeGen/AtomicExpandPass.cpp		patch \| blob \| history
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp		patch \| blob \| history
llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp		patch \| blob \| history
llvm/lib/IR/Core.cpp		patch \| blob \| history
llvm/lib/IR/Instructions.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp		patch \| blob \| history
llvm/lib/Transforms/Utils/LowerAtomic.cpp		patch \| blob \| history
llvm/test/Assembler/atomic.ll		patch \| blob \| history
llvm/test/Bitcode/compatibility.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir		patch \| blob \| history
llvm/test/Transforms/InstCombine/atomicrmw.ll		patch \| blob \| history
llvm/test/Transforms/LowerAtomic/atomic-load.ll		patch \| blob \| history