[X86] Add feature for Fast Short REP MOV (FSRM) for Icelake or newer.
authorHiroshi Yamauchi <yamauchi@google.com>
Fri, 14 Aug 2020 19:20:08 +0000 (12:20 -0700)
committerHiroshi Yamauchi <yamauchi@google.com>
Wed, 19 Aug 2020 20:39:42 +0000 (13:39 -0700)
Differential Revision: https://reviews.llvm.org/D85989

llvm/lib/Target/X86/X86.td
llvm/lib/Target/X86/X86InstrInfo.td
llvm/lib/Target/X86/X86Subtarget.h

index 6debd3a..bb52bd6 100644 (file)
@@ -372,6 +372,12 @@ def FeatureERMSB
           "ermsb", "HasERMSB", "true",
           "REP MOVS/STOS are fast">;
 
+// Icelake and newer processors have Fast Short REP MOV.
+def FeatureFSRM
+    : SubtargetFeature<
+          "fsrm", "HasFSRM", "true",
+          "REP MOVSB of short lengths is faster">;
+
 // Bulldozer and newer processors can merge CMP/TEST (but not other
 // instructions) with conditional branches.
 def FeatureBranchFusion
@@ -713,7 +719,8 @@ def ProcessorFeatures {
                                                   FeatureVPOPCNTDQ,
                                                   FeatureGFNI,
                                                   FeatureCLWB,
-                                                  FeatureRDPID];
+                                                  FeatureRDPID,
+                                                  FeatureFSRM];
   list<SubtargetFeature> ICLTuning = CNLTuning;
   list<SubtargetFeature> ICLFeatures =
     !listconcat(CNLFeatures, ICLAdditionalFeatures);
index 3ea0ae8..8d21780 100644 (file)
@@ -1016,6 +1016,7 @@ def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
 def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
 def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
 def HasERMSB : Predicate<"Subtarget->hasERMSB()">;
+def HasFSRM : Predicate<"Subtarget->hasFSRM()">;
 def HasMFence    : Predicate<"Subtarget->hasMFence()">;
 def UseIndirectThunkCalls : Predicate<"Subtarget->useIndirectThunkCalls()">;
 def NotUseIndirectThunkCalls : Predicate<"!Subtarget->useIndirectThunkCalls()">;
index 7b48e27..923f810 100644 (file)
@@ -302,6 +302,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   /// True if the processor has enhanced REP MOVSB/STOSB.
   bool HasERMSB = false;
 
+  /// True if the processor has fast short REP MOV.
+  bool HasFSRM = false;
+
   /// True if the short functions should be padded to prevent
   /// a stall when returning too early.
   bool PadShortFunctions = false;
@@ -694,6 +697,7 @@ public:
   bool hasMacroFusion() const { return HasMacroFusion; }
   bool hasBranchFusion() const { return HasBranchFusion; }
   bool hasERMSB() const { return HasERMSB; }
+  bool hasFSRM() const { return HasFSRM; }
   bool hasSlowDivide32() const { return HasSlowDivide32; }
   bool hasSlowDivide64() const { return HasSlowDivide64; }
   bool padShortFunctions() const { return PadShortFunctions; }