[Codegen][ReplaceWithVecLib] add pass to replace vector intrinsics with calls to...

author Lukas Sommer <sommer@esa.tu-darmstadt.de>

Thu, 4 Feb 2021 19:56:15 +0000 (14:56 -0500)

committer Sanjay Patel <spatel@rotateright.com>

Fri, 5 Feb 2021 19:25:19 +0000 (14:25 -0500)
author Lukas Sommer <sommer@esa.tu-darmstadt.de>
Thu, 4 Feb 2021 19:56:15 +0000 (14:56 -0500)
committer Sanjay Patel <spatel@rotateright.com>
Fri, 5 Feb 2021 19:25:19 +0000 (14:25 -0500)
diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h

index 1a94d28..412300b 100644 (file)
--- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
@@ -29,6 +29,7 @@
  #include "llvm/CodeGen/MachineModuleInfo.h"
  #include "llvm/CodeGen/MachinePassManager.h"
  #include "llvm/CodeGen/PreISelIntrinsicLowering.h"
+#include "llvm/CodeGen/ReplaceWithVeclib.h"
  #include "llvm/CodeGen/UnreachableBlockElim.h"
  #include "llvm/IR/IRPrintingPasses.h"
  #include "llvm/IR/PassManager.h"
@@ -650,6 +651,12 @@ void CodeGenPassBuilder<Derived>::addIRPasses(AddIRPass &addPass) const {
    if (getOptLevel() != CodeGenOpt::None && !Opt.DisableConstantHoisting)
      addPass(ConstantHoistingPass());
  
+  if (getOptLevel() != CodeGenOpt::None) {
+    // Replace calls to LLVM intrinsics (e.g., exp, log) operating on vector
+    // operands with calls to the corresponding functions in a vector library.
+    addPass(ReplaceWithVeclib());
+  }
+
    if (getOptLevel() != CodeGenOpt::None && !Opt.DisablePartialLibcallInlining)
      addPass(PartiallyInlineLibCallsPass());
  
diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def

index e9eaa5f..9fd0701 100644 (file)
--- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def
+++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
@@ -39,6 +39,7 @@ FUNCTION_PASS("mergeicmps", MergeICmpsPass, ())
  FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass, ())
  FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass, ())
  FUNCTION_PASS("consthoist", ConstantHoistingPass, ())
+FUNCTION_PASS("replace-with-veclib", ReplaceWithVeclib, ())
  FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ())
  FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false))
  FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true))
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h

index 676ed2c..15dd7d1 100644 (file)
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -448,6 +448,10 @@ namespace llvm {
    /// shuffles.
    FunctionPass *createExpandReductionsPass();
  
+  // This pass replaces intrinsics operating on vector operands with calls to
+  // the corresponding function in a vector library (e.g., SVML, libmvec).
+  FunctionPass *createReplaceWithVeclibLegacyPass();
+
    // This pass expands memcmp() to load/stores.
    FunctionPass *createExpandMemCmpPass();
  
diff --git a/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h b/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h

new file mode 100644 (file)

index 0000000..643ffca
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h
@@ -0,0 +1,38 @@
+//===- ReplaceWithVeclib.h - Replace vector instrinsics with veclib calls -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Replaces calls to LLVM vector intrinsics (i.e., calls to LLVM intrinsics
+// with vector operands) with matching calls to functions from a vector
+// library (e.g., libmvec, SVML) according to TargetLibraryInfo.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_UTILS_REPLACEWITHVECLIB_H
+#define LLVM_TRANSFORMS_UTILS_REPLACEWITHVECLIB_H
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+
+namespace llvm {
+class ReplaceWithVeclib : public PassInfoMixin<ReplaceWithVeclib> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+// Legacy pass
+class ReplaceWithVeclibLegacy : public FunctionPass {
+public:
+  static char ID;
+  ReplaceWithVeclibLegacy() : FunctionPass(ID) {
+    initializeReplaceWithVeclibLegacyPass(*PassRegistry::getPassRegistry());
+  }
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool runOnFunction(Function &F) override;
+};
+
+} // End namespace llvm
+#endif // LLVM_TRANSFORMS_UTILS_REPLACEWITHVECLIB_H
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h

index 4f89179..c2daddc 100644 (file)
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -380,6 +380,7 @@ void initializeRegionPrinterPass(PassRegistry&);
  void initializeRegionViewerPass(PassRegistry&);
  void initializeRegisterCoalescerPass(PassRegistry&);
  void initializeRenameIndependentSubregsPass(PassRegistry&);
+void initializeReplaceWithVeclibLegacyPass(PassRegistry &);
  void initializeResetMachineFunctionPass(PassRegistry&);
  void initializeReversePostOrderFunctionAttrsLegacyPassPass(PassRegistry&);
  void initializeRewriteStatepointsForGCLegacyPassPass(PassRegistry &);
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt

index 32a7946..93b1fe7 100644 (file)
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -147,6 +147,7 @@ add_llvm_component_library(LLVMCodeGen
    RegisterUsageInfo.cpp
    RegUsageInfoCollector.cpp
    RegUsageInfoPropagate.cpp
+  ReplaceWithVeclib.cpp
    ResetMachineFunctionPass.cpp
    SafeStack.cpp
    SafeStackLayout.cpp
diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp

new file mode 100644 (file)

index 0000000..2f2d8b5
--- /dev/null
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -0,0 +1,256 @@
+//=== ReplaceWithVeclib.cpp - Replace vector instrinsics with veclib calls ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Replaces calls to LLVM vector intrinsics (i.e., calls to LLVM intrinsics
+// with vector operands) with matching calls to functions from a vector
+// library (e.g., libmvec, SVML) according to TargetLibraryInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ReplaceWithVeclib.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "replace-with-veclib"
+
+STATISTIC(NumCallsReplaced,
+          "Number of calls to intrinsics that have been replaced.");
+
+STATISTIC(NumTLIFuncDeclAdded,
+          "Number of vector library function declarations added.");
+
+STATISTIC(NumFuncUsedAdded,
+          "Number of functions added to `llvm.compiler.used`");
+
+static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) {
+  Module *M = CI.getModule();
+
+  Function *OldFunc = CI.getCalledFunction();
+
+  // Check if the vector library function is already declared in this module,
+  // otherwise insert it.
+  Function *TLIFunc = M->getFunction(TLIName);
+  if (!TLIFunc) {
+    TLIFunc = Function::Create(OldFunc->getFunctionType(),
+                               Function::ExternalLinkage, TLIName, *M);
+    TLIFunc->copyAttributesFrom(OldFunc);
+
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `"
+                      << TLIName << "` of type `" << *(TLIFunc->getType())
+                      << "` to module.\n");
+
+    ++NumTLIFuncDeclAdded;
+
+    // Add the freshly created function to llvm.compiler.used,
+    // similar to as it is done in InjectTLIMappings
+    appendToCompilerUsed(*M, {TLIFunc});
+
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName
+                      << "` to `@llvm.compiler.used`.\n");
+    ++NumFuncUsedAdded;
+  }
+
+  // Replace the call to the vector intrinsic with a call
+  // to the corresponding function from the vector library.
+  IRBuilder<> IRBuilder{&CI};
+  SmallVector<Value *> Args(CI.arg_operands());
+  // Preserve the operand bundles.
+  SmallVector<OperandBundleDef, 1> OpBundles;
+  CI.getOperandBundlesAsDefs(OpBundles);
+  CallInst *Replacement = IRBuilder.CreateCall(TLIFunc, Args, OpBundles);
+  assert(OldFunc->getFunctionType() == TLIFunc->getFunctionType() &&
+         "Expecting function types to be identical");
+  CI.replaceAllUsesWith(Replacement);
+  if (isa<FPMathOperator>(Replacement)) {
+    // Preserve fast math flags for FP math.
+    Replacement->copyFastMathFlags(&CI);
+  }
+
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `"
+                    << OldFunc->getName() << "` with call to `" << TLIName
+                    << "`.\n");
+  ++NumCallsReplaced;
+  return true;
+}
+
+static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
+                                    CallInst &CI) {
+  if (!CI.getCalledFunction()) {
+    return false;
+  }
+
+  auto IntrinsicID = CI.getCalledFunction()->getIntrinsicID();
+  if (IntrinsicID == Intrinsic::not_intrinsic) {
+    // Replacement is only performed for intrinsic functions
+    return false;
+  }
+
+  // Convert vector arguments to scalar type and check that
+  // all vector operands have identical vector width.
+  unsigned VF = 0;
+  SmallVector<Type *> ScalarTypes;
+  for (auto Arg : enumerate(CI.arg_operands())) {
+    auto *ArgType = Arg.value()->getType();
+    // Vector calls to intrinsics can still have
+    // scalar operands for specific arguments.
+    if (hasVectorInstrinsicScalarOpd(IntrinsicID, Arg.index())) {
+      ScalarTypes.push_back(ArgType);
+    } else {
+      // The argument in this place should be a vector if
+      // this is a call to a vector intrinsic.
+      auto *VectorArgTy = dyn_cast<VectorType>(ArgType);
+      if (!VectorArgTy) {
+        // The argument is not a vector, do not perform
+        // the replacement.
+        return false;
+      }
+      auto NumElements = VectorArgTy->getElementCount();
+      if (NumElements.isScalable()) {
+        // The current implementation does not support
+        // scalable vectors.
+        return false;
+      }
+      if (VF && VF != NumElements.getFixedValue()) {
+        // The different arguments differ in vector size.
+        return false;
+      } else {
+        VF = NumElements.getFixedValue();
+      }
+      ScalarTypes.push_back(VectorArgTy->getElementType());
+    }
+  }
+
+  // Try to reconstruct the name for the scalar version of this
+  // intrinsic using the intrinsic ID and the argument types
+  // converted to scalar above.
+  std::string ScalarName;
+  if (Intrinsic::isOverloaded(IntrinsicID)) {
+    ScalarName = Intrinsic::getName(IntrinsicID, ScalarTypes);
+  } else {
+    ScalarName = Intrinsic::getName(IntrinsicID).str();
+  }
+
+  if (!TLI.isFunctionVectorizable(ScalarName)) {
+    // The TargetLibraryInfo does not contain a vectorized version of
+    // the scalar function.
+    return false;
+  }
+
+  // Try to find the mapping for the scalar version of this intrinsic
+  // and the exact vector width of the call operands in the
+  // TargetLibraryInfo.
+  const std::string TLIName =
+      std::string(TLI.getVectorizedFunction(ScalarName, VF));
+
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
+                    << ScalarName << "` and vector width " << VF << ".\n");
+
+  if (!TLIName.empty()) {
+    // Found the correct mapping in the TargetLibraryInfo,
+    // replace the call to the intrinsic with a call to
+    // the vector library function.
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI function `" << TLIName
+                      << "`.\n");
+    return replaceWithTLIFunction(CI, TLIName);
+  }
+
+  return false;
+}
+
+static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
+  bool Changed = false;
+  SmallVector<CallInst *> ReplacedCalls;
+  for (auto &I : instructions(F)) {
+    if (auto *CI = dyn_cast<CallInst>(&I)) {
+      if (replaceWithCallToVeclib(TLI, *CI)) {
+        ReplacedCalls.push_back(CI);
+        Changed = true;
+      }
+    }
+  }
+  // Erase the calls to the intrinsics that have been replaced
+  // with calls to the vector library.
+  for (auto *CI : ReplacedCalls) {
+    CI->eraseFromParent();
+  }
+  return Changed;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// New pass manager implementation.
+////////////////////////////////////////////////////////////////////////////////
+PreservedAnalyses ReplaceWithVeclib::run(Function &F,
+                                         FunctionAnalysisManager &AM) {
+  const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+  auto Changed = runImpl(TLI, F);
+  if (Changed) {
+    PreservedAnalyses PA;
+    PA.preserveSet<CFGAnalyses>();
+    PA.preserve<TargetLibraryAnalysis>();
+    PA.preserve<ScalarEvolutionAnalysis>();
+    PA.preserve<AAManager>();
+    PA.preserve<LoopAccessAnalysis>();
+    PA.preserve<DemandedBitsAnalysis>();
+    PA.preserve<OptimizationRemarkEmitterAnalysis>();
+    PA.preserve<GlobalsAA>();
+    return PA;
+  } else {
+    // The pass did not replace any calls, hence it preserves all analyses.
+    return PreservedAnalyses::all();
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Legacy PM Implementation.
+////////////////////////////////////////////////////////////////////////////////
+bool ReplaceWithVeclibLegacy::runOnFunction(Function &F) {
+  const TargetLibraryInfo &TLI =
+      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+  return runImpl(TLI, F);
+}
+
+void ReplaceWithVeclibLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+  AU.addPreserved<TargetLibraryInfoWrapperPass>();
+  AU.addPreserved<ScalarEvolutionWrapperPass>();
+  AU.addPreserved<AAResultsWrapperPass>();
+  AU.addPreserved<LoopAccessLegacyAnalysis>();
+  AU.addPreserved<DemandedBitsWrapperPass>();
+  AU.addPreserved<OptimizationRemarkEmitterWrapperPass>();
+  AU.addPreserved<GlobalsAAWrapperPass>();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Legacy Pass manager initialization
+////////////////////////////////////////////////////////////////////////////////
+char ReplaceWithVeclibLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ReplaceWithVeclibLegacy, DEBUG_TYPE,
+                      "Replace intrinsics with calls to vector library", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ReplaceWithVeclibLegacy, DEBUG_TYPE,
+                    "Replace intrinsics with calls to vector library", false,
+                    false)
+
+FunctionPass *llvm::createReplaceWithVeclibLegacyPass() {
+  return new ReplaceWithVeclibLegacy();
+}
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp

index e844d03..3e5a666 100644 (file)
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -858,6 +858,9 @@ void TargetPassConfig::addIRPasses() {
    if (getOptLevel() != CodeGenOpt::None && !DisableConstantHoisting)
      addPass(createConstantHoistingPass());
  
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createReplaceWithVeclibLegacyPass());
+
    if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining)
      addPass(createPartiallyInlineLibCallsPass());
  
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll

index 47ad73d..aa0f37a 100644 (file)
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -54,6 +54,7 @@
  ; CHECK-NEXT:       Branch Probability Analysis
  ; CHECK-NEXT:       Block Frequency Analysis
  ; CHECK-NEXT:       Constant Hoisting
+; CHECK-NEXT:       Replace intrinsics with calls to vector library
  ; CHECK-NEXT:       Partially inline calls to library functions
  ; CHECK-NEXT:       Instrument function entry/exit with calls to e.g. mcount() (post inlining)
  ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll

index 18471ca..21f09fd 100644 (file)
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -34,6 +34,7 @@
  ; CHECK-NEXT:      Branch Probability Analysis
  ; CHECK-NEXT:      Block Frequency Analysis
  ; CHECK-NEXT:      Constant Hoisting
+; CHECK-NEXT:      Replace intrinsics with calls to vector library
  ; CHECK-NEXT:      Partially inline calls to library functions
  ; CHECK-NEXT:      Instrument function entry/exit with calls to e.g. mcount() (post inlining)
  ; CHECK-NEXT:      Scalarize Masked Memory Intrinsics
diff --git a/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll b/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll

new file mode 100644 (file)

index 0000000..d0b31f3
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes
+; RUN: opt -vector-library=SVML -replace-with-veclib -S < %s | FileCheck %s  --check-prefixes=COMMON,SVML
+; RUN: opt -vector-library=LIBMVEC-X86 -replace-with-veclib -S < %s | FileCheck %s  --check-prefixes=COMMON,LIBMVEC-X86
+; RUN: opt -vector-library=MASSV -replace-with-veclib -S < %s | FileCheck %s  --check-prefixes=COMMON,MASSV
+; RUN: opt -vector-library=Accelerate -replace-with-veclib -S < %s | FileCheck %s  --check-prefixes=COMMON,ACCELERATE
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define <4 x double> @exp_v4(<4 x double> %in) {
+; SVML-LABEL: define {{[^@]+}}@exp_v4
+; SVML-SAME: (<4 x double> [[IN:%.*]]) {
+; SVML-NEXT:    [[TMP1:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[IN]])
+; SVML-NEXT:    ret <4 x double> [[TMP1]]
+;
+; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_v4
+; LIBMVEC-X86-SAME: (<4 x double> [[IN:%.*]]) {
+; LIBMVEC-X86-NEXT:    [[TMP1:%.*]] = call <4 x double> @_ZGVdN4v_exp(<4 x double> [[IN]])
+; LIBMVEC-X86-NEXT:    ret <4 x double> [[TMP1]]
+;
+; MASSV-LABEL: define {{[^@]+}}@exp_v4
+; MASSV-SAME: (<4 x double> [[IN:%.*]]) {
+; MASSV-NEXT:    [[CALL:%.*]] = call <4 x double> @llvm.exp.v4f64(<4 x double> [[IN]])
+; MASSV-NEXT:    ret <4 x double> [[CALL]]
+;
+; ACCELERATE-LABEL: define {{[^@]+}}@exp_v4
+; ACCELERATE-SAME: (<4 x double> [[IN:%.*]]) {
+; ACCELERATE-NEXT:    [[CALL:%.*]] = call <4 x double> @llvm.exp.v4f64(<4 x double> [[IN]])
+; ACCELERATE-NEXT:    ret <4 x double> [[CALL]]
+;
+  %call = call <4 x double> @llvm.exp.v4f64(<4 x double> %in)
+  ret <4 x double> %call
+}
+
+declare <4 x double> @llvm.exp.v4f64(<4 x double>) #0
+
+define <4 x float> @exp_f32(<4 x float> %in) {
+; SVML-LABEL: define {{[^@]+}}@exp_f32
+; SVML-SAME: (<4 x float> [[IN:%.*]]) {
+; SVML-NEXT:    [[TMP1:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[IN]])
+; SVML-NEXT:    ret <4 x float> [[TMP1]]
+;
+; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_f32
+; LIBMVEC-X86-SAME: (<4 x float> [[IN:%.*]]) {
+; LIBMVEC-X86-NEXT:    [[TMP1:%.*]] = call <4 x float> @_ZGVbN4v_expf(<4 x float> [[IN]])
+; LIBMVEC-X86-NEXT:    ret <4 x float> [[TMP1]]
+;
+; MASSV-LABEL: define {{[^@]+}}@exp_f32
+; MASSV-SAME: (<4 x float> [[IN:%.*]]) {
+; MASSV-NEXT:    [[TMP1:%.*]] = call <4 x float> @__expf4_massv(<4 x float> [[IN]])
+; MASSV-NEXT:    ret <4 x float> [[TMP1]]
+;
+; ACCELERATE-LABEL: define {{[^@]+}}@exp_f32
+; ACCELERATE-SAME: (<4 x float> [[IN:%.*]]) {
+; ACCELERATE-NEXT:    [[TMP1:%.*]] = call <4 x float> @vexpf(<4 x float> [[IN]])
+; ACCELERATE-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %call = call <4 x float> @llvm.exp.v4f32(<4 x float> %in)
+  ret <4 x float> %call
+}
+
+declare <4 x float> @llvm.exp.v4f32(<4 x float>) #0
+
+; No replacement should take place for non-vector intrinsic.
+define double @exp_f64(double %in) {
+; COMMON-LABEL: define {{[^@]+}}@exp_f64
+; COMMON-SAME: (double [[IN:%.*]]) {
+; COMMON-NEXT:    [[CALL:%.*]] = call double @llvm.exp.f64(double [[IN]])
+; COMMON-NEXT:    ret double [[CALL]]
+;
+  %call = call double @llvm.exp.f64(double %in)
+  ret double %call
+}
+
+declare double @llvm.exp.f64(double) #0
+
+; Check that the pass works with scalar operands on
+; vector intrinsics. No vector library has a substitute for powi.
+define <4 x double> @powi_v4(<4 x double> %in){
+; COMMON-LABEL: define {{[^@]+}}@powi_v4
+; COMMON-SAME: (<4 x double> [[IN:%.*]]) {
+; COMMON-NEXT:    [[CALL:%.*]] = call <4 x double> @llvm.powi.v4f64(<4 x double> [[IN]], i32 3)
+; COMMON-NEXT:    ret <4 x double> [[CALL]]
+;
+  %call = call <4 x double> @llvm.powi.v4f64(<4 x double> %in, i32 3)
+  ret <4 x double> %call
+}
+
+declare <4 x double> @llvm.powi.v4f64(<4 x double>, i32) #0
+
+; Replacement should not take place if the vector length
+; does not match exactly.
+define <3 x double> @exp_v3(<3 x double> %in) {
+; COMMON-LABEL: define {{[^@]+}}@exp_v3
+; COMMON-SAME: (<3 x double> [[IN:%.*]]) {
+; COMMON-NEXT:    [[CALL:%.*]] = call <3 x double> @llvm.exp.v3f64(<3 x double> [[IN]])
+; COMMON-NEXT:    ret <3 x double> [[CALL]]
+;
+  %call = call <3 x double> @llvm.exp.v3f64(<3 x double> %in)
+  ret <3 x double> %call
+}
+
+declare <3 x double> @llvm.exp.v3f64(<3 x double>) #0
+
+attributes #0 = {nounwind readnone}
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll

index 501a3ba..8c4112f 100644 (file)
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -51,6 +51,7 @@
  ; CHECK-NEXT:       Branch Probability Analysis
  ; CHECK-NEXT:       Block Frequency Analysis
  ; CHECK-NEXT:       Constant Hoisting
+; CHECK-NEXT:       Replace intrinsics with calls to vector library
  ; CHECK-NEXT:       Partially inline calls to library functions
  ; CHECK-NEXT:       Instrument function entry/exit with calls to e.g. mcount() (post inlining)
  ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp

index 48f0adf..7640aa4 100644 (file)
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@@ -351,6 +351,7 @@ int main(int argc, char **argv) {
    initializeExpandReductionsPass(*Registry);
    initializeHardwareLoopsPass(*Registry);
    initializeTransformUtils(*Registry);
+  initializeReplaceWithVeclibLegacyPass(*Registry);
  
    // Initialize debugging passes.
    initializeScavengerTestPass(*Registry);
diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp

index a45c575..de8095a 100644 (file)
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -574,6 +574,7 @@ int main(int argc, char **argv) {
    initializeWriteBitcodePassPass(Registry);
    initializeHardwareLoopsPass(Registry);
    initializeTypePromotionPass(Registry);
+  initializeReplaceWithVeclibLegacyPass(Registry);
  
  #ifdef BUILD_EXAMPLES
    initializeExampleIRTransforms(Registry);
diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn

index 49cbd51..b6a14af 100644 (file)
--- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
@@ -168,6 +168,7 @@ static_library("CodeGen") {
      "RegisterScavenging.cpp",
      "RegisterUsageInfo.cpp",
      "RenameIndependentSubregs.cpp",
+    "ReplaceWithVeclib.cpp",
      "ResetMachineFunctionPass.cpp",
      "SafeStack.cpp",
      "SafeStackLayout.cpp",
author	Lukas Sommer <sommer@esa.tu-darmstadt.de>
	Thu, 4 Feb 2021 19:56:15 +0000 (14:56 -0500)
committer	Sanjay Patel <spatel@rotateright.com>
	Fri, 5 Feb 2021 19:25:19 +0000 (14:25 -0500)
llvm/include/llvm/CodeGen/CodeGenPassBuilder.h		patch \| blob \| history
llvm/include/llvm/CodeGen/MachinePassRegistry.def		patch \| blob \| history
llvm/include/llvm/CodeGen/Passes.h		patch \| blob \| history
llvm/include/llvm/CodeGen/ReplaceWithVeclib.h	[new file with mode: 0644]	patch \| blob
llvm/include/llvm/InitializePasses.h		patch \| blob \| history
llvm/lib/CodeGen/CMakeLists.txt		patch \| blob \| history
llvm/lib/CodeGen/ReplaceWithVeclib.cpp	[new file with mode: 0644]	patch \| blob
llvm/lib/CodeGen/TargetPassConfig.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/O3-pipeline.ll		patch \| blob \| history
llvm/test/CodeGen/ARM/O3-pipeline.ll		patch \| blob \| history
llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/X86/opt-pipeline.ll		patch \| blob \| history
llvm/tools/llc/llc.cpp		patch \| blob \| history
llvm/tools/opt/opt.cpp		patch \| blob \| history
llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn		patch \| blob \| history