[clang] Add support for __builtin_memset_inline

author Guillaume Chatelet <gchatelet@google.com>

Tue, 7 Jun 2022 09:51:32 +0000 (09:51 +0000)

committer Guillaume Chatelet <gchatelet@google.com>

Fri, 10 Jun 2022 13:13:59 +0000 (13:13 +0000)
author Guillaume Chatelet <gchatelet@google.com>
Tue, 7 Jun 2022 09:51:32 +0000 (09:51 +0000)
committer Guillaume Chatelet <gchatelet@google.com>
Fri, 10 Jun 2022 13:13:59 +0000 (13:13 +0000)
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst

index 44848a2..3e4108b 100644 (file)
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -3218,6 +3218,26 @@ Note that the `size` argument must be a compile time constant.
  
  Note that this intrinsic cannot yet be called in a ``constexpr`` context.
  
+Guaranteed inlined memset
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+. code-block:: c
+
+  void __builtin_memset_inline(void *dst, int value, size_t size);
+
+
+``__builtin_memset_inline`` has been designed as a building block for efficient
+``memset`` implementations. It is identical to ``__builtin_memset`` but also
+guarantees not to call any external functions. See LLVM IR `llvm.memset.inline
+<https://llvm.org/docs/LangRef.html#llvm-memset-inline-intrinsic>`_ intrinsic
+for more information.
+
+This is useful to implement a custom version of ``memset``, implement a
+``libc`` memset or work around the absence of a ``libc``.
+
+Note that the `size` argument must be a compile time constant.
+
+Note that this intrinsic cannot yet be called in a ``constexpr`` context.
  
  Atomic Min/Max builtins with memory ordering
  --------------------------------------------
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def

index 173431c..c084cc2 100644 (file)
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -559,6 +559,7 @@ BUILTIN(__builtin_memcpy_inline, "vv*vC*Iz", "n")
  BUILTIN(__builtin_memmove, "v*v*vC*z", "nF")
  BUILTIN(__builtin_mempcpy, "v*v*vC*z", "nF")
  BUILTIN(__builtin_memset, "v*v*iz", "nF")
+BUILTIN(__builtin_memset_inline, "vv*iIz", "n")
  BUILTIN(__builtin_printf, "icC*.", "Fp:0:")
  BUILTIN(__builtin_stpcpy, "c*c*cC*", "nF")
  BUILTIN(__builtin_stpncpy, "c*c*cC*z", "nF")
diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h

index 68618df..2fcfea6 100644 (file)
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -344,6 +344,14 @@ public:
                          Dest.getAlignment().getAsAlign(), IsVolatile);
    }
  
+  using CGBuilderBaseTy::CreateMemSetInline;
+  llvm::CallInst *CreateMemSetInline(Address Dest, llvm::Value *Value,
+                                     uint64_t Size) {
+    return CreateMemSetInline(Dest.getPointer(),
+                              Dest.getAlignment().getAsAlign(), Value,
+                              getInt64(Size));
+  }
+
    using CGBuilderBaseTy::CreatePreserveStructAccessIndex;
    Address CreatePreserveStructAccessIndex(Address Addr, unsigned Index,
                                            unsigned FieldIndex,
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp

index 9d2e6df..c67df4d 100644 (file)
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3508,6 +3508,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
      Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
      return RValue::get(Dest.getPointer());
    }
+  case Builtin::BI__builtin_memset_inline: {
+    Address Dest = EmitPointerWithAlignment(E->getArg(0));
+    Value *ByteVal =
+        Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
+    uint64_t Size =
+        E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
+    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
+                        E->getArg(0)->getExprLoc(), FD, 0);
+    Builder.CreateMemSetInline(Dest, ByteVal, Size);
+    return RValue::get(nullptr);
+  }
    case Builtin::BI__builtin___memset_chk: {
      // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
      Expr::EvalResult SizeResult, DstSizeResult;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp

index 657238e..24bcb0d 100644 (file)
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2277,6 +2277,17 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
      }
      break;
    }
+  case Builtin::BI__builtin_memset_inline: {
+    clang::Expr *SizeOp = TheCall->getArg(2);
+    // We warn about filling to `nullptr` pointers when `size` is greater than
+    // 0. When `size` is value dependent we cannot evaluate its value so we bail
+    // out.
+    if (SizeOp->isValueDependent())
+      break;
+    if (!SizeOp->EvaluateKnownConstInt(Context).isZero())
+      CheckNonNullArgument(*this, TheCall->getArg(0), TheCall->getExprLoc());
+    break;
+  }
  #define BUILTIN(ID, TYPE, ATTRS)
  #define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \
    case Builtin::BI##ID: \
diff --git a/clang/test/CodeGen/builtins-memset-inline.c b/clang/test/CodeGen/builtins-memset-inline.c

new file mode 100644 (file)

index 0000000..0647186
--- /dev/null
+++ b/clang/test/CodeGen/builtins-memset-inline.c
@@ -0,0 +1,21 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s
+
+// CHECK-LABEL: define{{.*}} void @test_memset_inline_0(i8* noundef %dst, i8 noundef signext %value)
+void test_memset_inline_0(void *dst, char value) {
+  // CHECK:    call void @llvm.memset.inline.p0i8.i64(i8* align 1 %0, i8 %2, i64 0, i1 false)
+  __builtin_memset_inline(dst, value, 0);
+}
+
+// CHECK-LABEL: define{{.*}} void @test_memset_inline_1(i8* noundef %dst, i8 noundef signext %value)
+void test_memset_inline_1(void *dst, char value) {
+  // CHECK:    call void @llvm.memset.inline.p0i8.i64(i8* align 1 %0, i8 %2, i64 1, i1 false)
+  __builtin_memset_inline(dst, value, 1);
+}
+
+// CHECK-LABEL: define{{.*}} void @test_memset_inline_4(i8* noundef %dst, i8 noundef signext %value)
+void test_memset_inline_4(void *dst, char value) {
+  // CHECK:    call void @llvm.memset.inline.p0i8.i64(i8* align 1 %0, i8 %2, i64 4, i1 false)
+  __builtin_memset_inline(dst, value, 4);
+}
diff --git a/clang/test/Sema/builtins-memset-inline.cpp b/clang/test/Sema/builtins-memset-inline.cpp

new file mode 100644 (file)

index 0000000..e445b3b
--- /dev/null
+++ b/clang/test/Sema/builtins-memset-inline.cpp
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#define NULL ((char *)0)
+
+#if __has_builtin(__builtin_memset_inline)
+#warning defined as expected
+// expected-warning@-1 {{defined as expected}}
+#endif
+
+void test_memset_inline_invalid_arg_types() {
+  __builtin_memset_inline(1, 2, 3); // expected-error {{cannot initialize a parameter of type 'void *' with an rvalue of type 'int'}}
+}
+
+void test_memset_inline_null_dst(void *ptr) {
+  __builtin_memset_inline(NULL, 1, 4); // expected-warning {{null passed to a callee that requires a non-null argument}}
+}
+
+void test_memset_inline_null_buffer_is_ok_if_size_is_zero(void *ptr, char value) {
+  __builtin_memset_inline(NULL, value, /*size */ 0);
+}
+
+void test_memset_inline_non_constant_size(void *dst, char value, unsigned size) {
+  __builtin_memset_inline(dst, value, size); // expected-error {{argument to '__builtin_memset_inline' must be a constant integer}}
+}
+
+template <unsigned size>
+void test_memset_inline_template(void *dst, char value) {
+  // we do not try to evaluate size in non intantiated templates.
+  __builtin_memset_inline(dst, value, size);
+}
+
+void test_memset_inline_implicit_conversion(void *ptr, char value) {
+  char a[5];
+  __builtin_memset_inline(a, value, 5);
+}
+
+void test_memset_inline_num_args(void *dst, char value) {
+  __builtin_memset_inline();                    // expected-error {{too few arguments to function call}}
+  __builtin_memset_inline(dst, value, 4, NULL); // expected-error {{too many arguments to function call}}
+}
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst

index 7b5b4fc..d50aac1 100644 (file)
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -13867,6 +13867,71 @@ If ``<len>`` is not a well-defined value, the behavior is undefined.
  If ``<len>`` is not zero, ``<dest>`` should be well-defined, otherwise the
  behavior is undefined.
  
+.. _int_memset_inline:
+
+'``llvm.memset.inline``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.memset.inline`` on any
+integer bit width and for different address spaces. Not all targets
+support all bit widths however.
+
+::
+
+      declare void @llvm.memset.inline.p0i8.p0i8.i32(i8* <dest>, i8 <val>,
+                                                     i32 <len>,
+                                                     i1 <isvolatile>)
+      declare void @llvm.memset.inline.p0i8.p0i8.i64(i8* <dest>, i8 <val>,
+                                                     i64 <len>,
+                                                     i1 <isvolatile>)
+
+Overview:
+"""""""""
+
+The '``llvm.memset.inline.*``' intrinsics fill a block of memory with a
+particular byte value and guarantees that no external functions are called.
+
+Note that, unlike the standard libc function, the ``llvm.memset.inline.*``
+intrinsics do not return a value, take an extra isvolatile argument and the
+pointer can be in specified address spaces.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to the destination to fill, the second
+is the byte value with which to fill it, the third argument is a constant
+integer argument specifying the number of bytes to fill, and the fourth
+is a boolean indicating a volatile access.
+
+The :ref:`align <attr_align>` parameter attribute can be provided
+for the first argument.
+
+If the ``isvolatile`` parameter is ``true``, the ``llvm.memset.inline`` call is
+a :ref:`volatile operation <volatile>`. The detailed access behavior is not
+very cleanly specified and it is unwise to depend on it.
+
+Semantics:
+""""""""""
+
+The '``llvm.memset.inline.*``' intrinsics fill "len" bytes of memory starting
+at the destination location. If the argument is known to be
+aligned to some boundary, this can be specified as an attribute on
+the argument.
+
+``len`` must be a constant expression.
+If ``<len>`` is 0, it is no-op modulo the behavior of attributes attached to
+the arguments.
+If ``<len>`` is not a well-defined value, the behavior is undefined.
+If ``<len>`` is not zero, ``<dest>`` should be well-defined, otherwise the
+behavior is undefined.
+
+The behavior of '``llvm.memset.inline.*``' is equivalent to the behavior of
+'``llvm.memset.*``', but the generated code is guaranteed not to call any
+external functions.
+
  '``llvm.sqrt.*``' Intrinsic
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h

index 8722e89..b647725 100644 (file)
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1052,7 +1052,8 @@ public:
                       const AAMDNodes &AAInfo = AAMDNodes());
  
    SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
-                    SDValue Size, Align Alignment, bool isVol, bool isTailCall,
+                    SDValue Size, Align Alignment, bool isVol,
+                    bool AlwaysInline, bool isTailCall,
                      MachinePointerInfo DstPtrInfo,
                      const AAMDNodes &AAInfo = AAMDNodes());
  
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h

index 722c327..e7d6089 100644 (file)
--- a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h
@@ -76,11 +76,13 @@ public:
    /// that don't fit the target's parameters for simple stores and can be more
    /// efficient than using a library call. This function can return a null
    /// SDValue if the target declines to use custom code and a different
-  /// lowering strategy should be used.
+  /// lowering strategy should be used. Note that if AlwaysInline is true the
+  /// function has to return a valid SDValue.
    virtual SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
                                            SDValue Chain, SDValue Op1,
                                            SDValue Op2, SDValue Op3,
                                            Align Alignment, bool isVolatile,
+                                          bool AlwaysInline,
                                            MachinePointerInfo DstPtrInfo) const {
      return SDValue();
    }
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h

index 8ad6c4d..4cc7fe9 100644 (file)
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3539,6 +3539,7 @@ public:
  
    /// Determines the optimal series of memory ops to replace the memset / memcpy.
    /// Return true if the number of memory ops is below the threshold (Limit).
+  /// Note that this is always the case when Limit is ~0.
    /// It returns the types of the sequence of memory ops to perform
    /// memset / memcpy by reference.
    virtual bool
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h

index 659960f..2e0c6a4 100644 (file)
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -578,6 +578,12 @@ public:
                           MDNode *ScopeTag = nullptr,
                           MDNode *NoAliasTag = nullptr);
  
+  CallInst *CreateMemSetInline(Value *Dst, MaybeAlign DstAlign, Value *Val,
+                               Value *Size, bool IsVolatile = false,
+                               MDNode *TBAATag = nullptr,
+                               MDNode *ScopeTag = nullptr,
+                               MDNode *NoAliasTag = nullptr);
+
    /// Create and insert an element unordered-atomic memset of the region of
    /// memory starting at the given pointer to the given value.
    ///
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h

index e494fca..06d2335 100644 (file)
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -973,6 +973,7 @@ public:
      case Intrinsic::memcpy:
      case Intrinsic::memmove:
      case Intrinsic::memset:
+    case Intrinsic::memset_inline:
      case Intrinsic::memcpy_inline:
        return true;
      default:
@@ -984,12 +985,33 @@ public:
    }
  };
  
-/// This class wraps the llvm.memset intrinsic.
+/// This class wraps the llvm.memset and llvm.memset.inline intrinsics.
  class MemSetInst : public MemSetBase<MemIntrinsic> {
  public:
    // Methods for support type inquiry through isa, cast, and dyn_cast:
    static bool classof(const IntrinsicInst *I) {
-    return I->getIntrinsicID() == Intrinsic::memset;
+    switch (I->getIntrinsicID()) {
+    case Intrinsic::memset:
+    case Intrinsic::memset_inline:
+      return true;
+    default:
+      return false;
+    }
+  }
+  static bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+};
+
+/// This class wraps the llvm.memset.inline intrinsic.
+class MemSetInlineInst : public MemSetInst {
+public:
+  ConstantInt *getLength() const {
+    return cast<ConstantInt>(MemSetInst::getLength());
+  }
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::memset_inline;
    }
    static bool classof(const Value *V) {
      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
@@ -1074,6 +1096,7 @@ public:
      case Intrinsic::memcpy_inline:
      case Intrinsic::memmove:
      case Intrinsic::memset:
+    case Intrinsic::memset_inline:
      case Intrinsic::memcpy_element_unordered_atomic:
      case Intrinsic::memmove_element_unordered_atomic:
      case Intrinsic::memset_element_unordered_atomic:
@@ -1095,6 +1118,7 @@ public:
    static bool classof(const IntrinsicInst *I) {
      switch (I->getIntrinsicID()) {
      case Intrinsic::memset:
+    case Intrinsic::memset_inline:
      case Intrinsic::memset_element_unordered_atomic:
        return true;
      default:
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td

index c40f0d4..1b1cfd4 100644 (file)
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -651,6 +651,17 @@ def int_memset  : Intrinsic<[],
                               NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
                               ImmArg<ArgIndex<3>>]>;
  
+// Memset version that is guaranteed to be inlined.
+// In particular this means that the generated code is not allowed to call any
+// external function.
+// The third argument (specifying the size) must be a constant.
+def int_memset_inline
+    : Intrinsic<[],
+      [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i1_ty],
+      [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, IntrNoFree,
+       NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>, 
+       ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
+
  // FIXME: Add version of these floating point intrinsics which allow non-default
  // rounding modes and FP exception handling.
  
diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp

index 291fea8..1f46a21 100644 (file)
--- a/llvm/lib/Analysis/Lint.cpp
+++ b/llvm/lib/Analysis/Lint.cpp
@@ -335,6 +335,12 @@ void Lint::visitCallBase(CallBase &I) {
                             MSI->getDestAlign(), nullptr, MemRef::Write);
        break;
      }
+    case Intrinsic::memset_inline: {
+      MemSetInlineInst *MSII = cast<MemSetInlineInst>(&I);
+      visitMemoryReference(I, MemoryLocation::getForDest(MSII),
+                           MSII->getDestAlign(), nullptr, MemRef::Write);
+      break;
+    }
  
      case Intrinsic::vastart:
        Check(I.getParent()->getParent()->isVarArg(),
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index efb86e1..314aa7c 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6987,17 +6987,18 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
  /// \param Size Number of bytes to write.
  /// \param Alignment Alignment of the destination in bytes.
  /// \param isVol True if destination is volatile.
+/// \param AlwaysInline Makes sure no function call is generated.
  /// \param DstPtrInfo IR information on the memory pointer.
  /// \returns New head in the control flow, if lowering was successful, empty
  /// SDValue otherwise.
  ///
  /// The function tries to replace 'llvm.memset' intrinsic with several store
  /// operations and value calculation code. This is usually profitable for small
-/// memory size.
+/// memory size or when the semantic requires inlining.
  static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
                                 SDValue Chain, SDValue Dst, SDValue Src,
                                 uint64_t Size, Align Alignment, bool isVol,
-                               MachinePointerInfo DstPtrInfo,
+                               bool AlwaysInline, MachinePointerInfo DstPtrInfo,
                                 const AAMDNodes &AAInfo) {
    // Turn a memset of undef to nop.
    // FIXME: We need to honor volatile even is Src is undef.
@@ -7017,8 +7018,10 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
      DstAlignCanChange = true;
    bool IsZeroVal =
        isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero();
+  unsigned Limit = AlwaysInline ? ~0 : TLI.getMaxStoresPerMemset(OptSize);
+
    if (!TLI.findOptimalMemOpLowering(
-          MemOps, TLI.getMaxStoresPerMemset(OptSize),
+          MemOps, Limit,
            MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol),
            DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes()))
      return SDValue();
@@ -7314,7 +7317,7 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl,
  
  SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
                                  SDValue Src, SDValue Size, Align Alignment,
-                                bool isVol, bool isTailCall,
+                                bool isVol, bool AlwaysInline, bool isTailCall,
                                  MachinePointerInfo DstPtrInfo,
                                  const AAMDNodes &AAInfo) {
    // Check to see if we should lower the memset to stores first.
@@ -7327,7 +7330,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
  
      SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,
                                       ConstantSize->getZExtValue(), Alignment,
-                                     isVol, DstPtrInfo, AAInfo);
+                                     isVol, false, DstPtrInfo, AAInfo);
  
      if (Result.getNode())
        return Result;
@@ -7337,11 +7340,23 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
    // code. If the target chooses to do this, this is the next best.
    if (TSI) {
      SDValue Result = TSI->EmitTargetCodeForMemset(
-        *this, dl, Chain, Dst, Src, Size, Alignment, isVol, DstPtrInfo);
+        *this, dl, Chain, Dst, Src, Size, Alignment, isVol, AlwaysInline, DstPtrInfo);
      if (Result.getNode())
        return Result;
    }
  
+  // If we really need inline code and the target declined to provide it,
+  // use a (potentially long) sequence of loads and stores.
+  if (AlwaysInline) {
+    assert(ConstantSize && "AlwaysInline requires a constant size!");
+    SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,
+                                     ConstantSize->getZExtValue(), Alignment,
+                                     isVol, true, DstPtrInfo, AAInfo);
+    assert(Result &&
+           "getMemsetStores must return a valid sequence when AlwaysInline");
+    return Result;
+  }
+
    checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
  
    // Emit a library call.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index a1293d3..1da07dd 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5909,10 +5909,28 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
      bool isVol = MSI.isVolatile();
      bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
      SDValue Root = isVol ? getRoot() : getMemoryRoot();
-    SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC,
+    SDValue MS = DAG.getMemset(
+        Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false,
+        isTC, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata());
+    updateDAGForMaybeTailCall(MS);
+    return;
+  }
+  case Intrinsic::memset_inline: {
+    const auto &MSII = cast<MemSetInlineInst>(I);
+    SDValue Dst = getValue(I.getArgOperand(0));
+    SDValue Value = getValue(I.getArgOperand(1));
+    SDValue Size = getValue(I.getArgOperand(2));
+    assert(isa<ConstantSDNode>(Size) && "memset_inline needs constant size");
+    // @llvm.memset defines 0 and 1 to both mean no alignment.
+    Align DstAlign = MSII.getDestAlign().valueOrOne();
+    bool isVol = MSII.isVolatile();
+    bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
+    SDValue Root = isVol ? getRoot() : getMemoryRoot();
+    SDValue MC = DAG.getMemset(Root, sdl, Dst, Value, Size, DstAlign, isVol,
+                               /* AlwaysInline */ true, isTC,
                                 MachinePointerInfo(I.getArgOperand(0)),
                                 I.getAAMetadata());
-    updateDAGForMaybeTailCall(MS);
+    updateDAGForMaybeTailCall(MC);
      return;
    }
    case Intrinsic::memmove: {
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index 5cf5aa9..1bd8326 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -196,7 +196,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
  bool TargetLowering::findOptimalMemOpLowering(
      std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
      unsigned SrcAS, const AttributeList &FuncAttributes) const {
-  if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
+  if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
+      Op.getSrcAlign() < Op.getDstAlign())
      return false;
  
    EVT VT = getOptimalMemOpType(Op, FuncAttributes);
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp

index 622ed40..d0c622f 100644 (file)
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -164,6 +164,35 @@ CallInst *IRBuilderBase::CreateMemSet(Value *Ptr, Value *Val, Value *Size,
    return CI;
  }
  
+CallInst *IRBuilderBase::CreateMemSetInline(Value *Dst, MaybeAlign DstAlign,
+                                            Value *Val, Value *Size,
+                                            bool IsVolatile, MDNode *TBAATag,
+                                            MDNode *ScopeTag,
+                                            MDNode *NoAliasTag) {
+  Dst = getCastedInt8PtrValue(Dst);
+  Value *Ops[] = {Dst, Val, Size, getInt1(IsVolatile)};
+  Type *Tys[] = {Dst->getType(), Size->getType()};
+  Module *M = BB->getParent()->getParent();
+  Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset_inline, Tys);
+
+  CallInst *CI = createCallHelper(TheFn, Ops, this);
+
+  if (DstAlign)
+    cast<MemSetInlineInst>(CI)->setDestAlignment(*DstAlign);
+
+  // Set the TBAA info if present.
+  if (TBAATag)
+    CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+  if (ScopeTag)
+    CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag);
+
+  if (NoAliasTag)
+    CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
+
+  return CI;
+}
+
  CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemSet(
      Value *Ptr, Value *Val, Value *Size, Align Alignment, uint32_t ElementSize,
      MDNode *TBAATag, MDNode *ScopeTag, MDNode *NoAliasTag) {
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp

index 4952279..0df0c32 100644 (file)
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -4917,7 +4917,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
    case Intrinsic::memcpy:
    case Intrinsic::memcpy_inline:
    case Intrinsic::memmove:
-  case Intrinsic::memset: {
+  case Intrinsic::memset:
+  case Intrinsic::memset_inline: {
      const auto *MI = cast<MemIntrinsic>(&Call);
      auto IsValidAlignment = [&](unsigned Alignment) -> bool {
        return Alignment == 0 || isPowerOf2_32(Alignment);
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp

index 07494c4..677797a 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -91,7 +91,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy(
  
  SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
      SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
-    SDValue Size, Align Alignment, bool isVolatile,
+    SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
      MachinePointerInfo DstPtrInfo) const {
    const AArch64Subtarget &STI =
        DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h

index 47fe3bf..73f9372 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -34,7 +34,7 @@ public:
    SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
                                    SDValue Chain, SDValue Dst, SDValue Src,
                                    SDValue Size, Align Alignment,
-                                  bool isVolatile,
+                                  bool isVolatile, bool AlwaysInline,
                                    MachinePointerInfo DstPtrInfo) const override;
    SDValue
    EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,
diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp

index 12d4ad8..3795217 100644 (file)
--- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -296,7 +296,7 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
  
  SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
      SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
-    SDValue Size, Align Alignment, bool isVolatile,
+    SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
      MachinePointerInfo DstPtrInfo) const {
  
    const ARMSubtarget &Subtarget =
@@ -314,6 +314,9 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
                         DAG.getZExtOrTrunc(Size, dl, MVT::i32));
    }
  
-  return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
-                                Alignment.value(), RTLIB::MEMSET);
+  if (!AlwaysInline)
+    return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size,
+                                  Alignment.value(), RTLIB::MEMSET);
+
+  return SDValue();
  }
diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h

index 7aa831c..ffa8b50 100644 (file)
--- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -55,6 +55,7 @@ public:
    SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
                                    SDValue Chain, SDValue Op1, SDValue Op2,
                                    SDValue Op3, Align Alignment, bool isVolatile,
+                                  bool AlwaysInline,
                                    MachinePointerInfo DstPtrInfo) const override;
  
    SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl,
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

index 32305af..9a1be95 100644 (file)
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1000,13 +1000,15 @@ bool SystemZTargetLowering::findOptimalMemOpLowering(
      unsigned SrcAS, const AttributeList &FuncAttributes) const {
    const int MVCFastLen = 16;
  
-  // Don't expand Op into scalar loads/stores in these cases:
-  if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
-    return false;  // Small memcpy: Use MVC
-  if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
-    return false;  // Small memset (first byte with STC/MVI): Use MVC
-  if (Op.isZeroMemset())
-    return false;  // Memset zero: Use XC
+  if (Limit != ~unsigned(0)) {
+    // Don't expand Op into scalar loads/stores in these cases:
+    if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
+      return false; // Small memcpy: Use MVC
+    if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
+      return false; // Small memset (first byte with STC/MVI): Use MVC
+    if (Op.isZeroMemset())
+      return false; // Memset zero: Use XC
+  }
  
    return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
                                                    SrcAS, FuncAttributes);
diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp

index db4b487..ce30d8e 100644 (file)
--- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -88,7 +88,7 @@ static SDValue memsetStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
  SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
      SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst,
      SDValue Byte, SDValue Size, Align Alignment, bool IsVolatile,
-    MachinePointerInfo DstPtrInfo) const {
+    bool AlwaysInline, MachinePointerInfo DstPtrInfo) const {
    EVT PtrVT = Dst.getValueType();
  
    if (IsVolatile)
diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h

index da67257..6ac5bf8 100644 (file)
--- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -31,7 +31,7 @@ public:
    SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL,
                                    SDValue Chain, SDValue Dst, SDValue Byte,
                                    SDValue Size, Align Alignment,
-                                  bool IsVolatile,
+                                  bool IsVolatile, bool AlwaysInline,
                                    MachinePointerInfo DstPtrInfo) const override;
  
    std::pair<SDValue, SDValue>
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp

index 16e0515..74af4c8 100644 (file)
--- a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
@@ -44,7 +44,7 @@ SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemmove(
  
  SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemset(
      SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Val,
-    SDValue Size, Align Alignment, bool IsVolatile,
+    SDValue Size, Align Alignment, bool IsVolatile, bool AlwaysInline,
      MachinePointerInfo DstPtrInfo) const {
    auto &ST = DAG.getMachineFunction().getSubtarget<WebAssemblySubtarget>();
    if (!ST.hasBulkMemory())
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h

index f4d2132..fd517b2 100644 (file)
--- a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
@@ -37,6 +37,7 @@ public:
    SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL,
                                    SDValue Chain, SDValue Op1, SDValue Op2,
                                    SDValue Op3, Align Alignment, bool IsVolatile,
+                                  bool AlwaysInline,
                                    MachinePointerInfo DstPtrInfo) const override;
  };
  
diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp

index e51d05f..78a286a 100644 (file)
--- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -46,7 +46,7 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible(
  
  SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
      SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
-    SDValue Size, Align Alignment, bool isVolatile,
+    SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
      MachinePointerInfo DstPtrInfo) const {
    ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
    const X86Subtarget &Subtarget =
@@ -143,7 +143,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
                        DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
                                    DAG.getConstant(Offset, dl, AddrVT)),
                        Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment,
-                      isVolatile, false, DstPtrInfo.getWithOffset(Offset));
+                      isVolatile, AlwaysInline,
+                      /* isTailCall */ false, DstPtrInfo.getWithOffset(Offset));
    }
  
    // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.h b/llvm/lib/Target/X86/X86SelectionDAGInfo.h

index dac6297..19136ca 100644 (file)
--- a/llvm/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.h
@@ -29,7 +29,7 @@ public:
    SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl,
                                    SDValue Chain, SDValue Dst, SDValue Src,
                                    SDValue Size, Align Alignment,
-                                  bool isVolatile,
+                                  bool isVolatile, bool AlwaysInline,
                                    MachinePointerInfo DstPtrInfo) const override;
  
    SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
diff --git a/llvm/test/CodeGen/AArch64/memset-inline.ll b/llvm/test/CodeGen/AArch64/memset-inline.ll

new file mode 100644 (file)

index 0000000..66731ac
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/memset-inline.ll
@@ -0,0 +1,296 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=-neon | FileCheck %s --check-prefixes=ALL,GPR
+; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=neon  | FileCheck %s --check-prefixes=ALL,NEON
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.inline.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+
+; /////////////////////////////////////////////////////////////////////////////
+
+define void @memset_1(i8* %a, i8 %value) nounwind {
+; ALL-LABEL: memset_1:
+; ALL:       // %bb.0:
+; ALL-NEXT:    strb w1, [x0]
+; ALL-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 1, i1 0)
+  ret void
+}
+
+define void @memset_2(i8* %a, i8 %value) nounwind {
+; ALL-LABEL: memset_2:
+; ALL:       // %bb.0:
+; ALL-NEXT:    bfi w1, w1, #8, #24
+; ALL-NEXT:    strh w1, [x0]
+; ALL-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 2, i1 0)
+  ret void
+}
+
+define void @memset_4(i8* %a, i8 %value) nounwind {
+; ALL-LABEL: memset_4:
+; ALL:       // %bb.0:
+; ALL-NEXT:    mov w8, #16843009
+; ALL-NEXT:    and w9, w1, #0xff
+; ALL-NEXT:    mul w8, w9, w8
+; ALL-NEXT:    str w8, [x0]
+; ALL-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 4, i1 0)
+  ret void
+}
+
+define void @memset_8(i8* %a, i8 %value) nounwind {
+; ALL-LABEL: memset_8:
+; ALL:       // %bb.0:
+; ALL-NEXT:    // kill: def $w1 killed $w1 def $x1
+; ALL-NEXT:    mov x8, #72340172838076673
+; ALL-NEXT:    and x9, x1, #0xff
+; ALL-NEXT:    mul x8, x9, x8
+; ALL-NEXT:    str x8, [x0]
+; ALL-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 8, i1 0)
+  ret void
+}
+
+define void @memset_16(i8* %a, i8 %value) nounwind {
+; ALL-LABEL: memset_16:
+; ALL:       // %bb.0:
+; ALL-NEXT:    // kill: def $w1 killed $w1 def $x1
+; ALL-NEXT:    mov x8, #72340172838076673
+; ALL-NEXT:    and x9, x1, #0xff
+; ALL-NEXT:    mul x8, x9, x8
+; ALL-NEXT:    stp x8, x8, [x0]
+; ALL-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 16, i1 0)
+  ret void
+}
+
+define void @memset_32(i8* %a, i8 %value) nounwind {
+; GPR-LABEL: memset_32:
+; GPR:       // %bb.0:
+; GPR-NEXT:    // kill: def $w1 killed $w1 def $x1
+; GPR-NEXT:    mov x8, #72340172838076673
+; GPR-NEXT:    and x9, x1, #0xff
+; GPR-NEXT:    mul x8, x9, x8
+; GPR-NEXT:    stp x8, x8, [x0, #16]
+; GPR-NEXT:    stp x8, x8, [x0]
+; GPR-NEXT:    ret
+;
+; NEON-LABEL: memset_32:
+; NEON:       // %bb.0:
+; NEON-NEXT:    dup v0.16b, w1
+; NEON-NEXT:    stp q0, q0, [x0]
+; NEON-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 32, i1 0)
+  ret void
+}
+
+define void @memset_64(i8* %a, i8 %value) nounwind {
+; GPR-LABEL: memset_64:
+; GPR:       // %bb.0:
+; GPR-NEXT:    // kill: def $w1 killed $w1 def $x1
+; GPR-NEXT:    mov x8, #72340172838076673
+; GPR-NEXT:    and x9, x1, #0xff
+; GPR-NEXT:    mul x8, x9, x8
+; GPR-NEXT:    stp x8, x8, [x0, #48]
+; GPR-NEXT:    stp x8, x8, [x0, #32]
+; GPR-NEXT:    stp x8, x8, [x0, #16]
+; GPR-NEXT:    stp x8, x8, [x0]
+; GPR-NEXT:    ret
+;
+; NEON-LABEL: memset_64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    dup v0.16b, w1
+; NEON-NEXT:    stp q0, q0, [x0]
+; NEON-NEXT:    stp q0, q0, [x0, #32]
+; NEON-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 64, i1 0)
+  ret void
+}
+
+; /////////////////////////////////////////////////////////////////////////////
+
+define void @aligned_memset_16(i8* align 16 %a, i8 %value) nounwind {
+; ALL-LABEL: aligned_memset_16:
+; ALL:       // %bb.0:
+; ALL-NEXT:    // kill: def $w1 killed $w1 def $x1
+; ALL-NEXT:    mov x8, #72340172838076673
+; ALL-NEXT:    and x9, x1, #0xff
+; ALL-NEXT:    mul x8, x9, x8
+; ALL-NEXT:    stp x8, x8, [x0]
+; ALL-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* align 16 %a, i8 %value, i64 16, i1 0)
+  ret void
+}
+
+define void @aligned_memset_32(i8* align 32 %a, i8 %value) nounwind {
+; GPR-LABEL: aligned_memset_32:
+; GPR:       // %bb.0:
+; GPR-NEXT:    // kill: def $w1 killed $w1 def $x1
+; GPR-NEXT:    mov x8, #72340172838076673
+; GPR-NEXT:    and x9, x1, #0xff
+; GPR-NEXT:    mul x8, x9, x8
+; GPR-NEXT:    stp x8, x8, [x0, #16]
+; GPR-NEXT:    stp x8, x8, [x0]
+; GPR-NEXT:    ret
+;
+; NEON-LABEL: aligned_memset_32:
+; NEON:       // %bb.0:
+; NEON-NEXT:    dup v0.16b, w1
+; NEON-NEXT:    stp q0, q0, [x0]
+; NEON-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* align 32 %a, i8 %value, i64 32, i1 0)
+  ret void
+}
+
+define void @aligned_memset_64(i8* align 64 %a, i8 %value) nounwind {
+; GPR-LABEL: aligned_memset_64:
+; GPR:       // %bb.0:
+; GPR-NEXT:    // kill: def $w1 killed $w1 def $x1
+; GPR-NEXT:    mov x8, #72340172838076673
+; GPR-NEXT:    and x9, x1, #0xff
+; GPR-NEXT:    mul x8, x9, x8
+; GPR-NEXT:    stp x8, x8, [x0, #48]
+; GPR-NEXT:    stp x8, x8, [x0, #32]
+; GPR-NEXT:    stp x8, x8, [x0, #16]
+; GPR-NEXT:    stp x8, x8, [x0]
+; GPR-NEXT:    ret
+;
+; NEON-LABEL: aligned_memset_64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    dup v0.16b, w1
+; NEON-NEXT:    stp q0, q0, [x0]
+; NEON-NEXT:    stp q0, q0, [x0, #32]
+; NEON-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* align 64 %a, i8 %value, i64 64, i1 0)
+  ret void
+}
+
+; /////////////////////////////////////////////////////////////////////////////
+
+define void @bzero_1(i8* %a) nounwind {
+; ALL-LABEL: bzero_1:
+; ALL:       // %bb.0:
+; ALL-NEXT:    strb wzr, [x0]
+; ALL-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 0, i64 1, i1 0)
+  ret void
+}
+
+define void @bzero_2(i8* %a) nounwind {
+; ALL-LABEL: bzero_2:
+; ALL:       // %bb.0:
+; ALL-NEXT:    strh wzr, [x0]
+; ALL-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 0, i64 2, i1 0)
+  ret void
+}
+
+define void @bzero_4(i8* %a) nounwind {
+; ALL-LABEL: bzero_4:
+; ALL:       // %bb.0:
+; ALL-NEXT:    str wzr, [x0]
+; ALL-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 0, i64 4, i1 0)
+  ret void
+}
+
+define void @bzero_8(i8* %a) nounwind {
+; ALL-LABEL: bzero_8:
+; ALL:       // %bb.0:
+; ALL-NEXT:    str xzr, [x0]
+; ALL-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 0, i64 8, i1 0)
+  ret void
+}
+
+define void @bzero_16(i8* %a) nounwind {
+; ALL-LABEL: bzero_16:
+; ALL:       // %bb.0:
+; ALL-NEXT:    stp xzr, xzr, [x0]
+; ALL-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 0, i64 16, i1 0)
+  ret void
+}
+
+define void @bzero_32(i8* %a) nounwind {
+; GPR-LABEL: bzero_32:
+; GPR:       // %bb.0:
+; GPR-NEXT:    adrp x8, .LCPI15_0
+; GPR-NEXT:    ldr q0, [x8, :lo12:.LCPI15_0]
+; GPR-NEXT:    stp q0, q0, [x0]
+; GPR-NEXT:    ret
+;
+; NEON-LABEL: bzero_32:
+; NEON:       // %bb.0:
+; NEON-NEXT:    movi v0.2d, #0000000000000000
+; NEON-NEXT:    stp q0, q0, [x0]
+; NEON-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 0, i64 32, i1 0)
+  ret void
+}
+
+define void @bzero_64(i8* %a) nounwind {
+; GPR-LABEL: bzero_64:
+; GPR:       // %bb.0:
+; GPR-NEXT:    adrp x8, .LCPI16_0
+; GPR-NEXT:    ldr q0, [x8, :lo12:.LCPI16_0]
+; GPR-NEXT:    stp q0, q0, [x0]
+; GPR-NEXT:    stp q0, q0, [x0, #32]
+; GPR-NEXT:    ret
+;
+; NEON-LABEL: bzero_64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    movi v0.2d, #0000000000000000
+; NEON-NEXT:    stp q0, q0, [x0]
+; NEON-NEXT:    stp q0, q0, [x0, #32]
+; NEON-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 0, i64 64, i1 0)
+  ret void
+}
+
+; /////////////////////////////////////////////////////////////////////////////
+
+define void @aligned_bzero_16(i8* %a) nounwind {
+; ALL-LABEL: aligned_bzero_16:
+; ALL:       // %bb.0:
+; ALL-NEXT:    stp xzr, xzr, [x0]
+; ALL-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* align 16 %a, i8 0, i64 16, i1 0)
+  ret void
+}
+
+define void @aligned_bzero_32(i8* %a) nounwind {
+; GPR-LABEL: aligned_bzero_32:
+; GPR:       // %bb.0:
+; GPR-NEXT:    adrp x8, .LCPI18_0
+; GPR-NEXT:    ldr q0, [x8, :lo12:.LCPI18_0]
+; GPR-NEXT:    stp q0, q0, [x0]
+; GPR-NEXT:    ret
+;
+; NEON-LABEL: aligned_bzero_32:
+; NEON:       // %bb.0:
+; NEON-NEXT:    movi v0.2d, #0000000000000000
+; NEON-NEXT:    stp q0, q0, [x0]
+; NEON-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* align 32 %a, i8 0, i64 32, i1 0)
+  ret void
+}
+
+define void @aligned_bzero_64(i8* %a) nounwind {
+; GPR-LABEL: aligned_bzero_64:
+; GPR:       // %bb.0:
+; GPR-NEXT:    adrp x8, .LCPI19_0
+; GPR-NEXT:    ldr q0, [x8, :lo12:.LCPI19_0]
+; GPR-NEXT:    stp q0, q0, [x0]
+; GPR-NEXT:    stp q0, q0, [x0, #32]
+; GPR-NEXT:    ret
+;
+; NEON-LABEL: aligned_bzero_64:
+; NEON:       // %bb.0:
+; NEON-NEXT:    movi v0.2d, #0000000000000000
+; NEON-NEXT:    stp q0, q0, [x0]
+; NEON-NEXT:    stp q0, q0, [x0, #32]
+; NEON-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* align 64 %a, i8 0, i64 64, i1 0)
+  ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/memset-vs-memset-inline.ll b/llvm/test/CodeGen/AArch64/memset-vs-memset-inline.ll

new file mode 100644 (file)

index 0000000..47d7dd1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/memset-vs-memset-inline.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu | FileCheck %s
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.inline.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+
+define void @test1(i8* %a, i8 %value) nounwind {
+; CHECK-LABEL: test1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    mov x8, #72340172838076673
+; CHECK-NEXT:    and x9, x1, #0xff
+; CHECK-NEXT:    mul x8, x9, x8
+; CHECK-NEXT:    str x8, [x0]
+; CHECK-NEXT:    ret
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 8, i1 0)
+  ret void
+}
+
+define void @regular_memset_calls_external_function(i8* %a, i8 %value) nounwind {
+; CHECK-LABEL: regular_memset_calls_external_function:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w2, #1024
+; CHECK-NEXT:    b memset
+  tail call void @llvm.memset.p0i8.i64(i8* %a, i8 %value, i64 1024, i1 0)
+  ret void
+}
+
+define void @inlined_set_doesnt_call_external_function(i8* %a, i8 %value) nounwind {
+; CHECK-LABEL: inlined_set_doesnt_call_external_function:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dup v0.16b, w1
+; CHECK-NEXT:    stp q0, q0, [x0]
+; CHECK-NEXT:    stp q0, q0, [x0, #32]
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 1024, i1 0)
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/memset-inline.ll b/llvm/test/CodeGen/X86/memset-inline.ll

new file mode 100644 (file)

index 0000000..65cfcea
--- /dev/null
+++ b/llvm/test/CodeGen/X86/memset-inline.ll
@@ -0,0 +1,548 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse2,-sse4.2 | FileCheck %s --check-prefixes=GPR,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse4.2,-avx  | FileCheck %s --check-prefixes=GPR,SSE4
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx,-avx512f | FileCheck %s --check-prefixes=GPR,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512f      | FileCheck %s --check-prefixes=GPR,AVX512
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.inline.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+
+; /////////////////////////////////////////////////////////////////////////////
+
+define void @memset_1(i8* %a, i8 %value) nounwind {
+; GPR-LABEL: memset_1:
+; GPR:       # %bb.0:
+; GPR-NEXT:    movb %sil, (%rdi)
+; GPR-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 1, i1 0)
+  ret void
+}
+
+define void @memset_2(i8* %a, i8 %value) nounwind {
+; GPR-LABEL: memset_2:
+; GPR:       # %bb.0:
+; GPR-NEXT:    movzbl %sil, %eax
+; GPR-NEXT:    shll $8, %esi
+; GPR-NEXT:    orl %esi, %eax
+; GPR-NEXT:    movw %ax, (%rdi)
+; GPR-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 2, i1 0)
+  ret void
+}
+
+define void @memset_4(i8* %a, i8 %value) nounwind {
+; GPR-LABEL: memset_4:
+; GPR:       # %bb.0:
+; GPR-NEXT:    movzbl %sil, %eax
+; GPR-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; GPR-NEXT:    movl %eax, (%rdi)
+; GPR-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 4, i1 0)
+  ret void
+}
+
+define void @memset_8(i8* %a, i8 %value) nounwind {
+; GPR-LABEL: memset_8:
+; GPR:       # %bb.0:
+; GPR-NEXT:    # kill: def $esi killed $esi def $rsi
+; GPR-NEXT:    movzbl %sil, %eax
+; GPR-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; GPR-NEXT:    imulq %rax, %rcx
+; GPR-NEXT:    movq %rcx, (%rdi)
+; GPR-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 8, i1 0)
+  ret void
+}
+
+define void @memset_16(i8* %a, i8 %value) nounwind {
+; SSE2-LABEL: memset_16:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    # kill: def $esi killed $esi def $rsi
+; SSE2-NEXT:    movzbl %sil, %eax
+; SSE2-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; SSE2-NEXT:    imulq %rax, %rcx
+; SSE2-NEXT:    movq %rcx, 8(%rdi)
+; SSE2-NEXT:    movq %rcx, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: memset_16:
+; SSE4:       # %bb.0:
+; SSE4-NEXT:    movd %esi, %xmm0
+; SSE4-NEXT:    pxor %xmm1, %xmm1
+; SSE4-NEXT:    pshufb %xmm1, %xmm0
+; SSE4-NEXT:    movdqu %xmm0, (%rdi)
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: memset_16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovd %esi, %xmm0
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: memset_16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovd %esi, %xmm0
+; AVX512-NEXT:    vpbroadcastb %xmm0, %xmm0
+; AVX512-NEXT:    vmovdqu %xmm0, (%rdi)
+; AVX512-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 16, i1 0)
+  ret void
+}
+
+define void @memset_32(i8* %a, i8 %value) nounwind {
+; SSE2-LABEL: memset_32:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    # kill: def $esi killed $esi def $rsi
+; SSE2-NEXT:    movzbl %sil, %eax
+; SSE2-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; SSE2-NEXT:    imulq %rax, %rcx
+; SSE2-NEXT:    movq %rcx, 24(%rdi)
+; SSE2-NEXT:    movq %rcx, 16(%rdi)
+; SSE2-NEXT:    movq %rcx, 8(%rdi)
+; SSE2-NEXT:    movq %rcx, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: memset_32:
+; SSE4:       # %bb.0:
+; SSE4-NEXT:    movd %esi, %xmm0
+; SSE4-NEXT:    pxor %xmm1, %xmm1
+; SSE4-NEXT:    pshufb %xmm1, %xmm0
+; SSE4-NEXT:    movdqu %xmm0, 16(%rdi)
+; SSE4-NEXT:    movdqu %xmm0, (%rdi)
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: memset_32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovd %esi, %xmm0
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovdqu %xmm0, 16(%rdi)
+; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: memset_32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovd %esi, %xmm0
+; AVX512-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX512-NEXT:    vmovdqu %ymm0, (%rdi)
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 32, i1 0)
+  ret void
+}
+
+define void @memset_64(i8* %a, i8 %value) nounwind {
+; SSE2-LABEL: memset_64:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    # kill: def $esi killed $esi def $rsi
+; SSE2-NEXT:    movzbl %sil, %eax
+; SSE2-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; SSE2-NEXT:    imulq %rax, %rcx
+; SSE2-NEXT:    movq %rcx, 56(%rdi)
+; SSE2-NEXT:    movq %rcx, 48(%rdi)
+; SSE2-NEXT:    movq %rcx, 40(%rdi)
+; SSE2-NEXT:    movq %rcx, 32(%rdi)
+; SSE2-NEXT:    movq %rcx, 24(%rdi)
+; SSE2-NEXT:    movq %rcx, 16(%rdi)
+; SSE2-NEXT:    movq %rcx, 8(%rdi)
+; SSE2-NEXT:    movq %rcx, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: memset_64:
+; SSE4:       # %bb.0:
+; SSE4-NEXT:    movd %esi, %xmm0
+; SSE4-NEXT:    pxor %xmm1, %xmm1
+; SSE4-NEXT:    pshufb %xmm1, %xmm0
+; SSE4-NEXT:    movdqu %xmm0, 48(%rdi)
+; SSE4-NEXT:    movdqu %xmm0, 32(%rdi)
+; SSE4-NEXT:    movdqu %xmm0, 16(%rdi)
+; SSE4-NEXT:    movdqu %xmm0, (%rdi)
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: memset_64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovd %esi, %xmm0
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX-NEXT:    vmovups %ymm0, 32(%rdi)
+; AVX-NEXT:    vmovups %ymm0, (%rdi)
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: memset_64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movzbl %sil, %eax
+; AVX512-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX512-NEXT:    vpbroadcastd %eax, %zmm0
+; AVX512-NEXT:    vmovdqu64 %zmm0, (%rdi)
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 64, i1 0)
+  ret void
+}
+
+; /////////////////////////////////////////////////////////////////////////////
+
+define void @aligned_memset_16(i8* align 16 %a, i8 %value) nounwind {
+; SSE2-LABEL: aligned_memset_16:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movd %esi, %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; SSE2-NEXT:    movdqa %xmm0, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: aligned_memset_16:
+; SSE4:       # %bb.0:
+; SSE4-NEXT:    movd %esi, %xmm0
+; SSE4-NEXT:    pxor %xmm1, %xmm1
+; SSE4-NEXT:    pshufb %xmm1, %xmm0
+; SSE4-NEXT:    movdqa %xmm0, (%rdi)
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: aligned_memset_16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovd %esi, %xmm0
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovdqa %xmm0, (%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: aligned_memset_16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovd %esi, %xmm0
+; AVX512-NEXT:    vpbroadcastb %xmm0, %xmm0
+; AVX512-NEXT:    vmovdqa %xmm0, (%rdi)
+; AVX512-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* align 16 %a, i8 %value, i64 16, i1 0)
+  ret void
+}
+
+define void @aligned_memset_32(i8* align 32 %a, i8 %value) nounwind {
+; SSE2-LABEL: aligned_memset_32:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movd %esi, %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; SSE2-NEXT:    movdqa %xmm0, 16(%rdi)
+; SSE2-NEXT:    movdqa %xmm0, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: aligned_memset_32:
+; SSE4:       # %bb.0:
+; SSE4-NEXT:    movd %esi, %xmm0
+; SSE4-NEXT:    pxor %xmm1, %xmm1
+; SSE4-NEXT:    pshufb %xmm1, %xmm0
+; SSE4-NEXT:    movdqa %xmm0, 16(%rdi)
+; SSE4-NEXT:    movdqa %xmm0, (%rdi)
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: aligned_memset_32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovd %esi, %xmm0
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovdqa %xmm0, 16(%rdi)
+; AVX-NEXT:    vmovdqa %xmm0, (%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: aligned_memset_32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovd %esi, %xmm0
+; AVX512-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX512-NEXT:    vmovdqa %ymm0, (%rdi)
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* align 32 %a, i8 %value, i64 32, i1 0)
+  ret void
+}
+
+define void @aligned_memset_64(i8* align 64 %a, i8 %value) nounwind {
+; SSE2-LABEL: aligned_memset_64:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movd %esi, %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; SSE2-NEXT:    movdqa %xmm0, 48(%rdi)
+; SSE2-NEXT:    movdqa %xmm0, 32(%rdi)
+; SSE2-NEXT:    movdqa %xmm0, 16(%rdi)
+; SSE2-NEXT:    movdqa %xmm0, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: aligned_memset_64:
+; SSE4:       # %bb.0:
+; SSE4-NEXT:    movd %esi, %xmm0
+; SSE4-NEXT:    pxor %xmm1, %xmm1
+; SSE4-NEXT:    pshufb %xmm1, %xmm0
+; SSE4-NEXT:    movdqa %xmm0, 48(%rdi)
+; SSE4-NEXT:    movdqa %xmm0, 32(%rdi)
+; SSE4-NEXT:    movdqa %xmm0, 16(%rdi)
+; SSE4-NEXT:    movdqa %xmm0, (%rdi)
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: aligned_memset_64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovd %esi, %xmm0
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX-NEXT:    vmovaps %ymm0, 32(%rdi)
+; AVX-NEXT:    vmovaps %ymm0, (%rdi)
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: aligned_memset_64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movzbl %sil, %eax
+; AVX512-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; AVX512-NEXT:    vpbroadcastd %eax, %zmm0
+; AVX512-NEXT:    vmovdqa64 %zmm0, (%rdi)
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* align 64 %a, i8 %value, i64 64, i1 0)
+  ret void
+}
+
+; /////////////////////////////////////////////////////////////////////////////
+
+define void @bzero_1(i8* %a) nounwind {
+; GPR-LABEL: bzero_1:
+; GPR:       # %bb.0:
+; GPR-NEXT:    movb $0, (%rdi)
+; GPR-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 0, i64 1, i1 0)
+  ret void
+}
+
+define void @bzero_2(i8* %a) nounwind {
+; GPR-LABEL: bzero_2:
+; GPR:       # %bb.0:
+; GPR-NEXT:    movw $0, (%rdi)
+; GPR-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 0, i64 2, i1 0)
+  ret void
+}
+
+define void @bzero_4(i8* %a) nounwind {
+; GPR-LABEL: bzero_4:
+; GPR:       # %bb.0:
+; GPR-NEXT:    movl $0, (%rdi)
+; GPR-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 0, i64 4, i1 0)
+  ret void
+}
+
+define void @bzero_8(i8* %a) nounwind {
+; GPR-LABEL: bzero_8:
+; GPR:       # %bb.0:
+; GPR-NEXT:    movq $0, (%rdi)
+; GPR-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 0, i64 8, i1 0)
+  ret void
+}
+
+define void @bzero_16(i8* %a) nounwind {
+; SSE2-LABEL: bzero_16:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movq $0, 8(%rdi)
+; SSE2-NEXT:    movq $0, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: bzero_16:
+; SSE4:       # %bb.0:
+; SSE4-NEXT:    xorps %xmm0, %xmm0
+; SSE4-NEXT:    movups %xmm0, (%rdi)
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: bzero_16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovups %xmm0, (%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: bzero_16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT:    vmovups %xmm0, (%rdi)
+; AVX512-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 0, i64 16, i1 0)
+  ret void
+}
+
+define void @bzero_32(i8* %a) nounwind {
+; SSE2-LABEL: bzero_32:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movq $0, 24(%rdi)
+; SSE2-NEXT:    movq $0, 16(%rdi)
+; SSE2-NEXT:    movq $0, 8(%rdi)
+; SSE2-NEXT:    movq $0, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: bzero_32:
+; SSE4:       # %bb.0:
+; SSE4-NEXT:    xorps %xmm0, %xmm0
+; SSE4-NEXT:    movups %xmm0, 16(%rdi)
+; SSE4-NEXT:    movups %xmm0, (%rdi)
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: bzero_32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovups %ymm0, (%rdi)
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: bzero_32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT:    vmovups %ymm0, (%rdi)
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 0, i64 32, i1 0)
+  ret void
+}
+
+define void @bzero_64(i8* %a) nounwind {
+; SSE2-LABEL: bzero_64:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movq $0, 56(%rdi)
+; SSE2-NEXT:    movq $0, 48(%rdi)
+; SSE2-NEXT:    movq $0, 40(%rdi)
+; SSE2-NEXT:    movq $0, 32(%rdi)
+; SSE2-NEXT:    movq $0, 24(%rdi)
+; SSE2-NEXT:    movq $0, 16(%rdi)
+; SSE2-NEXT:    movq $0, 8(%rdi)
+; SSE2-NEXT:    movq $0, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: bzero_64:
+; SSE4:       # %bb.0:
+; SSE4-NEXT:    xorps %xmm0, %xmm0
+; SSE4-NEXT:    movups %xmm0, 48(%rdi)
+; SSE4-NEXT:    movups %xmm0, 32(%rdi)
+; SSE4-NEXT:    movups %xmm0, 16(%rdi)
+; SSE4-NEXT:    movups %xmm0, (%rdi)
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: bzero_64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovups %ymm0, 32(%rdi)
+; AVX-NEXT:    vmovups %ymm0, (%rdi)
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: bzero_64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT:    vmovups %zmm0, (%rdi)
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 0, i64 64, i1 0)
+  ret void
+}
+
+; /////////////////////////////////////////////////////////////////////////////
+
+define void @aligned_bzero_16(i8* %a) nounwind {
+; SSE2-LABEL: aligned_bzero_16:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    xorps %xmm0, %xmm0
+; SSE2-NEXT:    movaps %xmm0, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: aligned_bzero_16:
+; SSE4:       # %bb.0:
+; SSE4-NEXT:    xorps %xmm0, %xmm0
+; SSE4-NEXT:    movaps %xmm0, (%rdi)
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: aligned_bzero_16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovaps %xmm0, (%rdi)
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: aligned_bzero_16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT:    vmovaps %xmm0, (%rdi)
+; AVX512-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* align 16 %a, i8 0, i64 16, i1 0)
+  ret void
+}
+
+define void @aligned_bzero_32(i8* %a) nounwind {
+; SSE2-LABEL: aligned_bzero_32:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    xorps %xmm0, %xmm0
+; SSE2-NEXT:    movaps %xmm0, 16(%rdi)
+; SSE2-NEXT:    movaps %xmm0, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: aligned_bzero_32:
+; SSE4:       # %bb.0:
+; SSE4-NEXT:    xorps %xmm0, %xmm0
+; SSE4-NEXT:    movaps %xmm0, 16(%rdi)
+; SSE4-NEXT:    movaps %xmm0, (%rdi)
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: aligned_bzero_32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovaps %ymm0, (%rdi)
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: aligned_bzero_32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT:    vmovaps %ymm0, (%rdi)
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* align 32 %a, i8 0, i64 32, i1 0)
+  ret void
+}
+
+define void @aligned_bzero_64(i8* %a) nounwind {
+; SSE2-LABEL: aligned_bzero_64:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    xorps %xmm0, %xmm0
+; SSE2-NEXT:    movaps %xmm0, 48(%rdi)
+; SSE2-NEXT:    movaps %xmm0, 32(%rdi)
+; SSE2-NEXT:    movaps %xmm0, 16(%rdi)
+; SSE2-NEXT:    movaps %xmm0, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: aligned_bzero_64:
+; SSE4:       # %bb.0:
+; SSE4-NEXT:    xorps %xmm0, %xmm0
+; SSE4-NEXT:    movaps %xmm0, 48(%rdi)
+; SSE4-NEXT:    movaps %xmm0, 32(%rdi)
+; SSE4-NEXT:    movaps %xmm0, 16(%rdi)
+; SSE4-NEXT:    movaps %xmm0, (%rdi)
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: aligned_bzero_64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovaps %ymm0, 32(%rdi)
+; AVX-NEXT:    vmovaps %ymm0, (%rdi)
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+;
+; AVX512-LABEL: aligned_bzero_64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT:    vmovaps %zmm0, (%rdi)
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* align 64 %a, i8 0, i64 64, i1 0)
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/memset-vs-memset-inline.ll b/llvm/test/CodeGen/X86/memset-vs-memset-inline.ll

new file mode 100644 (file)

index 0000000..659b162
--- /dev/null
+++ b/llvm/test/CodeGen/X86/memset-vs-memset-inline.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.inline.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+
+define void @test1(i8* %a, i8 %value) nounwind {
+; CHECK-LABEL: test1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; CHECK-NEXT:    imulq %rax, %rcx
+; CHECK-NEXT:    movq %rcx, (%rdi)
+; CHECK-NEXT:    retq
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 8, i1 0)
+  ret void
+}
+
+define void @regular_memset_calls_external_function(i8* %a, i8 %value) nounwind {
+; CHECK-LABEL: regular_memset_calls_external_function:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl $1024, %edx # imm = 0x400
+; CHECK-NEXT:    jmp memset@PLT # TAILCALL
+  tail call void @llvm.memset.p0i8.i64(i8* %a, i8 %value, i64 1024, i1 0)
+  ret void
+}
+
+define void @inlined_set_doesnt_call_external_function(i8* %a, i8 %value) nounwind {
+; CHECK-LABEL: inlined_set_doesnt_call_external_function:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT:    movzbl %sil, %ecx
+; CHECK-NEXT:    movabsq $72340172838076673, %rax # imm = 0x101010101010101
+; CHECK-NEXT:    imulq %rcx, %rax
+; CHECK-NEXT:    movq %rax, 1016(%rdi)
+; CHECK-NEXT:    movq %rax, 1008(%rdi)
+  tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 1024, i1 0)
+  ret void
+}
diff --git a/llvm/test/Other/lint.ll b/llvm/test/Other/lint.ll

index 799d3fe..fbbb7bf 100644 (file)
--- a/llvm/test/Other/lint.ll
+++ b/llvm/test/Other/lint.ll
@@ -6,6 +6,8 @@ declare fastcc void @bar()
  declare void @llvm.stackrestore(i8*)
  declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
  declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.inline.p0i8.i8.i64(i8* nocapture, i8, i64, i1) nounwind
  declare void @has_sret(i8* sret(i8) %p)
  declare void @has_noaliases(i32* noalias %p, i32* %q)
  declare void @one_arg(i32)
@@ -87,6 +89,11 @@ call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* b
  ; CHECK: Unusual: noalias argument aliases another argument
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG to i8*), i64 1, i1 0)
  
+; CHECK: Write to read-only memory
+call void @llvm.memset.p0i8.i8.i64(i8* bitcast (i32* @CG to i8*), i8 1, i64 1, i1 0)
+; CHECK: Write to read-only memory
+call void @llvm.memset.inline.p0i8.i8.i64(i8* bitcast (i32* @CG to i8*), i8 1, i64 1, i1 0)
+
  ; CHECK: Undefined behavior: Buffer overflow
    %wider = bitcast i8* %buf to i16*
    store i16 0, i16* %wider
diff --git a/llvm/test/Verifier/intrinsic-immarg.ll b/llvm/test/Verifier/intrinsic-immarg.ll

index e1e77a9..5a58f33 100644 (file)
--- a/llvm/test/Verifier/intrinsic-immarg.ll
+++ b/llvm/test/Verifier/intrinsic-immarg.ll
@@ -62,6 +62,23 @@ define void @memset(i8* %dest, i8 %val, i1 %is.volatile) {
    ret void
  }
  
+declare void @llvm.memset.inline.p0i8.i32(i8* nocapture, i8, i32, i1)
+define void @memset_inline_is_volatile(i8* %dest, i8 %value, i1 %is.volatile) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %is.volatile
+  ; CHECK-NEXT: call void @llvm.memset.inline.p0i8.i32(i8* %dest, i8 %value, i32 8, i1 %is.volatile)
+  call void @llvm.memset.inline.p0i8.i32(i8* %dest, i8 %value, i32 8, i1 %is.volatile)
+  ret void
+}
+
+define void @memset_inline_variable_size(i8* %dest, i8 %value, i32 %size) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %size
+  ; CHECK-NEXT: call void @llvm.memset.inline.p0i8.i32(i8* %dest, i8 %value, i32 %size, i1 true)
+  call void @llvm.memset.inline.p0i8.i32(i8* %dest, i8 %value, i32 %size, i1 true)
+  ret void
+}
+
  
  declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1)
  define void @objectsize(i8* %ptr, i1 %a, i1 %b, i1 %c) {
diff --git a/llvm/test/Verifier/memset-inline.ll b/llvm/test/Verifier/memset-inline.ll

new file mode 100644 (file)

index 0000000..cdde246
--- /dev/null
+++ b/llvm/test/Verifier/memset-inline.ll
@@ -0,0 +1,9 @@
+; RUN: not opt -verify < %s 2>&1 | FileCheck %s
+
+; CHECK: alignment is not a power of two 
+
+define void @foo(i8* %P, i8 %value) {
+  call void @llvm.memset.inline.p0i8.i32(i8* align 3 %P, i8 %value, i32 4, i1 false)
+  ret void
+}
+declare void @llvm.memset.inline.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
author	Guillaume Chatelet <gchatelet@google.com>
	Tue, 7 Jun 2022 09:51:32 +0000 (09:51 +0000)
committer	Guillaume Chatelet <gchatelet@google.com>
	Fri, 10 Jun 2022 13:13:59 +0000 (13:13 +0000)
clang/docs/LanguageExtensions.rst		patch \| blob \| history
clang/include/clang/Basic/Builtins.def		patch \| blob \| history
clang/lib/CodeGen/CGBuilder.h		patch \| blob \| history
clang/lib/CodeGen/CGBuiltin.cpp		patch \| blob \| history
clang/lib/Sema/SemaChecking.cpp		patch \| blob \| history
clang/test/CodeGen/builtins-memset-inline.c	[new file with mode: 0644]	patch \| blob
clang/test/Sema/builtins-memset-inline.cpp	[new file with mode: 0644]	patch \| blob
llvm/docs/LangRef.rst		patch \| blob \| history
llvm/include/llvm/CodeGen/SelectionDAG.h		patch \| blob \| history
llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h		patch \| blob \| history
llvm/include/llvm/CodeGen/TargetLowering.h		patch \| blob \| history
llvm/include/llvm/IR/IRBuilder.h		patch \| blob \| history
llvm/include/llvm/IR/IntrinsicInst.h		patch \| blob \| history
llvm/include/llvm/IR/Intrinsics.td		patch \| blob \| history
llvm/lib/Analysis/Lint.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
llvm/lib/IR/IRBuilder.cpp		patch \| blob \| history
llvm/lib/IR/Verifier.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h		patch \| blob \| history
llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp		patch \| blob \| history
llvm/lib/Target/ARM/ARMSelectionDAGInfo.h		patch \| blob \| history
llvm/lib/Target/SystemZ/SystemZISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp		patch \| blob \| history
llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h		patch \| blob \| history
llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp		patch \| blob \| history
llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h		patch \| blob \| history
llvm/lib/Target/X86/X86SelectionDAGInfo.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86SelectionDAGInfo.h		patch \| blob \| history
llvm/test/CodeGen/AArch64/memset-inline.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/AArch64/memset-vs-memset-inline.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/X86/memset-inline.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/X86/memset-vs-memset-inline.ll	[new file with mode: 0644]	patch \| blob
llvm/test/Other/lint.ll		patch \| blob \| history
llvm/test/Verifier/intrinsic-immarg.ll		patch \| blob \| history
llvm/test/Verifier/memset-inline.ll	[new file with mode: 0644]	patch \| blob