--- /dev/null
+//=== ReplaceWithVeclib.cpp - Replace vector instrinsics with veclib calls ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Replaces calls to LLVM vector intrinsics (i.e., calls to LLVM intrinsics
+// with vector operands) with matching calls to functions from a vector
+// library (e.g., libmvec, SVML) according to TargetLibraryInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ReplaceWithVeclib.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "replace-with-veclib"
+
+STATISTIC(NumCallsReplaced,
+ "Number of calls to intrinsics that have been replaced.");
+
+STATISTIC(NumTLIFuncDeclAdded,
+ "Number of vector library function declarations added.");
+
+STATISTIC(NumFuncUsedAdded,
+ "Number of functions added to `llvm.compiler.used`");
+
+static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) {
+ Module *M = CI.getModule();
+
+ Function *OldFunc = CI.getCalledFunction();
+
+ // Check if the vector library function is already declared in this module,
+ // otherwise insert it.
+ Function *TLIFunc = M->getFunction(TLIName);
+ if (!TLIFunc) {
+ TLIFunc = Function::Create(OldFunc->getFunctionType(),
+ Function::ExternalLinkage, TLIName, *M);
+ TLIFunc->copyAttributesFrom(OldFunc);
+
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `"
+ << TLIName << "` of type `" << *(TLIFunc->getType())
+ << "` to module.\n");
+
+ ++NumTLIFuncDeclAdded;
+
+ // Add the freshly created function to llvm.compiler.used,
+ // similar to as it is done in InjectTLIMappings
+ appendToCompilerUsed(*M, {TLIFunc});
+
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName
+ << "` to `@llvm.compiler.used`.\n");
+ ++NumFuncUsedAdded;
+ }
+
+ // Replace the call to the vector intrinsic with a call
+ // to the corresponding function from the vector library.
+ IRBuilder<> IRBuilder{&CI};
+ SmallVector<Value *> Args(CI.arg_operands());
+ // Preserve the operand bundles.
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CI.getOperandBundlesAsDefs(OpBundles);
+ CallInst *Replacement = IRBuilder.CreateCall(TLIFunc, Args, OpBundles);
+ assert(OldFunc->getFunctionType() == TLIFunc->getFunctionType() &&
+ "Expecting function types to be identical");
+ CI.replaceAllUsesWith(Replacement);
+ if (isa<FPMathOperator>(Replacement)) {
+ // Preserve fast math flags for FP math.
+ Replacement->copyFastMathFlags(&CI);
+ }
+
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `"
+ << OldFunc->getName() << "` with call to `" << TLIName
+ << "`.\n");
+ ++NumCallsReplaced;
+ return true;
+}
+
+static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
+ CallInst &CI) {
+ if (!CI.getCalledFunction()) {
+ return false;
+ }
+
+ auto IntrinsicID = CI.getCalledFunction()->getIntrinsicID();
+ if (IntrinsicID == Intrinsic::not_intrinsic) {
+ // Replacement is only performed for intrinsic functions
+ return false;
+ }
+
+ // Convert vector arguments to scalar type and check that
+ // all vector operands have identical vector width.
+ unsigned VF = 0;
+ SmallVector<Type *> ScalarTypes;
+ for (auto Arg : enumerate(CI.arg_operands())) {
+ auto *ArgType = Arg.value()->getType();
+ // Vector calls to intrinsics can still have
+ // scalar operands for specific arguments.
+ if (hasVectorInstrinsicScalarOpd(IntrinsicID, Arg.index())) {
+ ScalarTypes.push_back(ArgType);
+ } else {
+ // The argument in this place should be a vector if
+ // this is a call to a vector intrinsic.
+ auto *VectorArgTy = dyn_cast<VectorType>(ArgType);
+ if (!VectorArgTy) {
+ // The argument is not a vector, do not perform
+ // the replacement.
+ return false;
+ }
+ auto NumElements = VectorArgTy->getElementCount();
+ if (NumElements.isScalable()) {
+ // The current implementation does not support
+ // scalable vectors.
+ return false;
+ }
+ if (VF && VF != NumElements.getFixedValue()) {
+ // The different arguments differ in vector size.
+ return false;
+ } else {
+ VF = NumElements.getFixedValue();
+ }
+ ScalarTypes.push_back(VectorArgTy->getElementType());
+ }
+ }
+
+ // Try to reconstruct the name for the scalar version of this
+ // intrinsic using the intrinsic ID and the argument types
+ // converted to scalar above.
+ std::string ScalarName;
+ if (Intrinsic::isOverloaded(IntrinsicID)) {
+ ScalarName = Intrinsic::getName(IntrinsicID, ScalarTypes);
+ } else {
+ ScalarName = Intrinsic::getName(IntrinsicID).str();
+ }
+
+ if (!TLI.isFunctionVectorizable(ScalarName)) {
+ // The TargetLibraryInfo does not contain a vectorized version of
+ // the scalar function.
+ return false;
+ }
+
+ // Try to find the mapping for the scalar version of this intrinsic
+ // and the exact vector width of the call operands in the
+ // TargetLibraryInfo.
+ const std::string TLIName =
+ std::string(TLI.getVectorizedFunction(ScalarName, VF));
+
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
+ << ScalarName << "` and vector width " << VF << ".\n");
+
+ if (!TLIName.empty()) {
+ // Found the correct mapping in the TargetLibraryInfo,
+ // replace the call to the intrinsic with a call to
+ // the vector library function.
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI function `" << TLIName
+ << "`.\n");
+ return replaceWithTLIFunction(CI, TLIName);
+ }
+
+ return false;
+}
+
+static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
+ bool Changed = false;
+ SmallVector<CallInst *> ReplacedCalls;
+ for (auto &I : instructions(F)) {
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ if (replaceWithCallToVeclib(TLI, *CI)) {
+ ReplacedCalls.push_back(CI);
+ Changed = true;
+ }
+ }
+ }
+ // Erase the calls to the intrinsics that have been replaced
+ // with calls to the vector library.
+ for (auto *CI : ReplacedCalls) {
+ CI->eraseFromParent();
+ }
+ return Changed;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// New pass manager implementation.
+////////////////////////////////////////////////////////////////////////////////
+PreservedAnalyses ReplaceWithVeclib::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto Changed = runImpl(TLI, F);
+ if (Changed) {
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ PA.preserve<TargetLibraryAnalysis>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ PA.preserve<AAManager>();
+ PA.preserve<LoopAccessAnalysis>();
+ PA.preserve<DemandedBitsAnalysis>();
+ PA.preserve<OptimizationRemarkEmitterAnalysis>();
+ PA.preserve<GlobalsAA>();
+ return PA;
+ } else {
+ // The pass did not replace any calls, hence it preserves all analyses.
+ return PreservedAnalyses::all();
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Legacy PM Implementation.
+////////////////////////////////////////////////////////////////////////////////
+bool ReplaceWithVeclibLegacy::runOnFunction(Function &F) {
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ return runImpl(TLI, F);
+}
+
+void ReplaceWithVeclibLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<LoopAccessLegacyAnalysis>();
+ AU.addPreserved<DemandedBitsWrapperPass>();
+ AU.addPreserved<OptimizationRemarkEmitterWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Legacy Pass manager initialization
+////////////////////////////////////////////////////////////////////////////////
+char ReplaceWithVeclibLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ReplaceWithVeclibLegacy, DEBUG_TYPE,
+ "Replace intrinsics with calls to vector library", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ReplaceWithVeclibLegacy, DEBUG_TYPE,
+ "Replace intrinsics with calls to vector library", false,
+ false)
+
+FunctionPass *llvm::createReplaceWithVeclibLegacyPass() {
+ return new ReplaceWithVeclibLegacy();
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes
+; RUN: opt -vector-library=SVML -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,SVML
+; RUN: opt -vector-library=LIBMVEC-X86 -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC-X86
+; RUN: opt -vector-library=MASSV -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,MASSV
+; RUN: opt -vector-library=Accelerate -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,ACCELERATE
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define <4 x double> @exp_v4(<4 x double> %in) {
+; SVML-LABEL: define {{[^@]+}}@exp_v4
+; SVML-SAME: (<4 x double> [[IN:%.*]]) {
+; SVML-NEXT: [[TMP1:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[IN]])
+; SVML-NEXT: ret <4 x double> [[TMP1]]
+;
+; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_v4
+; LIBMVEC-X86-SAME: (<4 x double> [[IN:%.*]]) {
+; LIBMVEC-X86-NEXT: [[TMP1:%.*]] = call <4 x double> @_ZGVdN4v_exp(<4 x double> [[IN]])
+; LIBMVEC-X86-NEXT: ret <4 x double> [[TMP1]]
+;
+; MASSV-LABEL: define {{[^@]+}}@exp_v4
+; MASSV-SAME: (<4 x double> [[IN:%.*]]) {
+; MASSV-NEXT: [[CALL:%.*]] = call <4 x double> @llvm.exp.v4f64(<4 x double> [[IN]])
+; MASSV-NEXT: ret <4 x double> [[CALL]]
+;
+; ACCELERATE-LABEL: define {{[^@]+}}@exp_v4
+; ACCELERATE-SAME: (<4 x double> [[IN:%.*]]) {
+; ACCELERATE-NEXT: [[CALL:%.*]] = call <4 x double> @llvm.exp.v4f64(<4 x double> [[IN]])
+; ACCELERATE-NEXT: ret <4 x double> [[CALL]]
+;
+ %call = call <4 x double> @llvm.exp.v4f64(<4 x double> %in)
+ ret <4 x double> %call
+}
+
+declare <4 x double> @llvm.exp.v4f64(<4 x double>) #0
+
+define <4 x float> @exp_f32(<4 x float> %in) {
+; SVML-LABEL: define {{[^@]+}}@exp_f32
+; SVML-SAME: (<4 x float> [[IN:%.*]]) {
+; SVML-NEXT: [[TMP1:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[IN]])
+; SVML-NEXT: ret <4 x float> [[TMP1]]
+;
+; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_f32
+; LIBMVEC-X86-SAME: (<4 x float> [[IN:%.*]]) {
+; LIBMVEC-X86-NEXT: [[TMP1:%.*]] = call <4 x float> @_ZGVbN4v_expf(<4 x float> [[IN]])
+; LIBMVEC-X86-NEXT: ret <4 x float> [[TMP1]]
+;
+; MASSV-LABEL: define {{[^@]+}}@exp_f32
+; MASSV-SAME: (<4 x float> [[IN:%.*]]) {
+; MASSV-NEXT: [[TMP1:%.*]] = call <4 x float> @__expf4_massv(<4 x float> [[IN]])
+; MASSV-NEXT: ret <4 x float> [[TMP1]]
+;
+; ACCELERATE-LABEL: define {{[^@]+}}@exp_f32
+; ACCELERATE-SAME: (<4 x float> [[IN:%.*]]) {
+; ACCELERATE-NEXT: [[TMP1:%.*]] = call <4 x float> @vexpf(<4 x float> [[IN]])
+; ACCELERATE-NEXT: ret <4 x float> [[TMP1]]
+;
+ %call = call <4 x float> @llvm.exp.v4f32(<4 x float> %in)
+ ret <4 x float> %call
+}
+
+declare <4 x float> @llvm.exp.v4f32(<4 x float>) #0
+
+; No replacement should take place for non-vector intrinsic.
+define double @exp_f64(double %in) {
+; COMMON-LABEL: define {{[^@]+}}@exp_f64
+; COMMON-SAME: (double [[IN:%.*]]) {
+; COMMON-NEXT: [[CALL:%.*]] = call double @llvm.exp.f64(double [[IN]])
+; COMMON-NEXT: ret double [[CALL]]
+;
+ %call = call double @llvm.exp.f64(double %in)
+ ret double %call
+}
+
+declare double @llvm.exp.f64(double) #0
+
+; Check that the pass works with scalar operands on
+; vector intrinsics. No vector library has a substitute for powi.
+define <4 x double> @powi_v4(<4 x double> %in){
+; COMMON-LABEL: define {{[^@]+}}@powi_v4
+; COMMON-SAME: (<4 x double> [[IN:%.*]]) {
+; COMMON-NEXT: [[CALL:%.*]] = call <4 x double> @llvm.powi.v4f64(<4 x double> [[IN]], i32 3)
+; COMMON-NEXT: ret <4 x double> [[CALL]]
+;
+ %call = call <4 x double> @llvm.powi.v4f64(<4 x double> %in, i32 3)
+ ret <4 x double> %call
+}
+
+declare <4 x double> @llvm.powi.v4f64(<4 x double>, i32) #0
+
+; Replacement should not take place if the vector length
+; does not match exactly.
+define <3 x double> @exp_v3(<3 x double> %in) {
+; COMMON-LABEL: define {{[^@]+}}@exp_v3
+; COMMON-SAME: (<3 x double> [[IN:%.*]]) {
+; COMMON-NEXT: [[CALL:%.*]] = call <3 x double> @llvm.exp.v3f64(<3 x double> [[IN]])
+; COMMON-NEXT: ret <3 x double> [[CALL]]
+;
+ %call = call <3 x double> @llvm.exp.v3f64(<3 x double> %in)
+ ret <3 x double> %call
+}
+
+declare <3 x double> @llvm.exp.v3f64(<3 x double>) #0
+
+attributes #0 = {nounwind readnone}