#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
return true;
}
+/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
+/// pessimistic codegen that has to account for setting errno and can enable
+/// vectorization.
+static bool
+foldSqrt(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI) {
+ // Match a call to sqrt mathlib function.
+ auto *Call = dyn_cast<CallInst>(&I);
+ if (!Call)
+ return false;
+
+ Module *M = Call->getModule();
+ LibFunc Func;
+ if (!TLI.getLibFunc(*Call, Func) || !isLibFuncEmittable(M, &TLI, Func))
+ return false;
+
+ if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
+ return false;
+
+ // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created,
+ // and (3) we would not end up lowering to a libcall anyway (which could
+ // change the value of errno), then:
+ // (1) the operand arg must not be less than -0.0.
+ // (2) errno won't be set.
+ // (3) it is safe to convert this to an intrinsic call.
+ // TODO: Check if the arg is known non-negative.
+ Type *Ty = Call->getType();
+ if (TTI.haveFastSqrt(Ty) && Call->hasNoNaNs()) {
+ IRBuilder<> Builder(&I);
+ IRBuilderBase::FastMathFlagGuard Guard(Builder);
+ Builder.setFastMathFlags(Call->getFastMathFlags());
+
+ Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty);
+ Value *NewSqrt = Builder.CreateCall(Sqrt, Call->getArgOperand(0), "sqrt");
+ I.replaceAllUsesWith(NewSqrt);
+
+ // Explicitly erase the old call because a call with side effects is not
+ // trivially dead.
+ I.eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
+
/// This is the entry point for folds that could be implemented in regular
/// InstCombine, but they are separated because they are not expected to
/// occur frequently and/or have more than a constant-length pattern match.
static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
- TargetTransformInfo &TTI) {
+ TargetTransformInfo &TTI,
+ TargetLibraryInfo &TLI) {
bool MadeChange = false;
for (BasicBlock &BB : F) {
// Ignore unreachable basic blocks.
if (!DT.isReachableFromEntry(&BB))
continue;
- // Do not delete instructions under here and invalidate the iterator.
+
// Walk the block backwards for efficiency. We're matching a chain of
// use->defs, so we're more likely to succeed by starting from the bottom.
// Also, we want to avoid matching partial patterns.
// TODO: It would be more efficient if we removed dead instructions
// iteratively in this loop rather than waiting until the end.
- for (Instruction &I : llvm::reverse(BB)) {
+ for (Instruction &I : make_early_inc_range(llvm::reverse(BB))) {
MadeChange |= foldAnyOrAllBitsSet(I);
MadeChange |= foldGuardedFunnelShift(I, DT);
MadeChange |= tryToRecognizePopCount(I);
MadeChange |= tryToFPToSat(I, TTI);
+ MadeChange |= foldSqrt(I, TTI, TLI);
}
}
const DataLayout &DL = F.getParent()->getDataLayout();
TruncInstCombine TIC(AC, TLI, DL, DT);
MadeChange |= TIC.run(F);
- MadeChange |= foldUnusualPatterns(F, DT, TTI);
+ MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI);
return MadeChange;
}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=aggressive-instcombine -mtriple x86_64-- -S | FileCheck %s
+
+declare float @sqrtf(float)
+declare double @sqrt(double)
+declare fp128 @sqrtl(fp128)
+
+; "nnan" implies no setting of errno and the target can lower this to an
+; instruction, so transform to an intrinsic.
+
+define float @sqrt_call_nnan_f32(float %x) {
+; CHECK-LABEL: @sqrt_call_nnan_f32(
+; CHECK-NEXT: [[SQRT1:%.*]] = call nnan float @llvm.sqrt.f32(float [[X:%.*]])
+; CHECK-NEXT: ret float [[SQRT1]]
+;
+ %sqrt = call nnan float @sqrtf(float %x)
+ ret float %sqrt
+}
+
+; Verify that other FMF are propagated to the intrinsic call.
+; We don't care about propagating 'tail' because this is not going to be a lowered as a call.
+
+define double @sqrt_call_nnan_f64(double %x) {
+; CHECK-LABEL: @sqrt_call_nnan_f64(
+; CHECK-NEXT: [[SQRT1:%.*]] = call nnan ninf double @llvm.sqrt.f64(double [[X:%.*]])
+; CHECK-NEXT: ret double [[SQRT1]]
+;
+ %sqrt = tail call nnan ninf double @sqrt(double %x)
+ ret double %sqrt
+}
+
+; We don't change this because it will be lowered to a call that could
+; theoretically still change errno and affect other accessors of errno.
+
+define fp128 @sqrt_call_nnan_f128(fp128 %x) {
+; CHECK-LABEL: @sqrt_call_nnan_f128(
+; CHECK-NEXT: [[SQRT:%.*]] = call nnan fp128 @sqrtl(fp128 [[X:%.*]])
+; CHECK-NEXT: ret fp128 [[SQRT]]
+;
+ %sqrt = call nnan fp128 @sqrtl(fp128 %x)
+ ret fp128 %sqrt
+}
+
+; Don't alter a no-builtin libcall.
+
+define float @sqrt_call_nnan_f32_nobuiltin(float %x) {
+; CHECK-LABEL: @sqrt_call_nnan_f32_nobuiltin(
+; CHECK-NEXT: [[SQRT:%.*]] = call nnan float @sqrtf(float [[X:%.*]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT: ret float [[SQRT]]
+;
+ %sqrt = call nnan float @sqrtf(float %x) nobuiltin
+ ret float %sqrt
+}