[llvm/CodeGen] Enable the ExpandLargeDivRem pass for X86, Arm and AArch64

author Matthias Gehre <matthias.gehre@xilinx.com>

Tue, 19 Jul 2022 10:28:54 +0000 (11:28 +0100)

committer Matthias Gehre <matthias.gehre@xilinx.com>

Tue, 6 Sep 2022 14:32:04 +0000 (15:32 +0100)
author Matthias Gehre <matthias.gehre@xilinx.com>
Tue, 19 Jul 2022 10:28:54 +0000 (11:28 +0100)
committer Matthias Gehre <matthias.gehre@xilinx.com>
Tue, 6 Sep 2022 14:32:04 +0000 (15:32 +0100)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h

index 525b5db10e35cdd61ef5b45e7150bc2c27f6d02b..2b3c5a0a469fb7a6679276fe37d53019253db923 100644 (file)
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -687,6 +687,9 @@ public:
    /// would typically be allowed using throughput or size cost models.
    bool hasDivRemOp(Type *DataType, bool IsSigned) const;
  
+  /// Returns the maximum bitwidth of legal div and rem instructions.
+  unsigned maxLegalDivRemBitWidth() const;
+
    /// Return true if the given instruction (assumed to be a memory access
    /// instruction) has a volatile variant. If that's the case then we can avoid
    /// addrspacecast to generic AS for volatile loads/stores. Default
@@ -1641,6 +1644,7 @@ public:
                                 const SmallBitVector &OpcodeMask) const = 0;
    virtual bool enableOrderedReductions() = 0;
    virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
+  virtual unsigned maxLegalDivRemBitWidth() = 0;
    virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
    virtual bool prefersVectorizedAddressing() = 0;
    virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
@@ -2088,6 +2092,9 @@ public:
    bool hasDivRemOp(Type *DataType, bool IsSigned) override {
      return Impl.hasDivRemOp(DataType, IsSigned);
    }
+  unsigned maxLegalDivRemBitWidth() override {
+    return Impl.maxLegalDivRemBitWidth();
+  }
    bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
      return Impl.hasVolatileVariant(I, AddrSpace);
    }
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

index 8342a82197ea8e1a2b9272784733fb7085719840..487a439264433f57c426b2298cf3a823c63c3139 100644 (file)
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -291,6 +291,10 @@ public:
  
    bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
  
+  bool maxLegalDivRemBitWidth() const {
+    return llvm::IntegerType::MAX_INT_BITS;
+  }
+
    bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
      return false;
    }
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp

index d009f2fc0bdd0e86d8430a77513582119c300b05..46b39669daec7dde93efd8c2c1b9413aa850af23 100644 (file)
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -451,6 +451,10 @@ bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
    return TTIImpl->hasDivRemOp(DataType, IsSigned);
  }
  
+unsigned TargetTransformInfo::maxLegalDivRemBitWidth() const {
+  return TTIImpl->maxLegalDivRemBitWidth();
+}
+
  bool TargetTransformInfo::hasVolatileVariant(Instruction *I,
                                               unsigned AddrSpace) const {
    return TTIImpl->hasVolatileVariant(I, AddrSpace);
diff --git a/llvm/lib/CodeGen/ExpandLargeDivRem.cpp b/llvm/lib/CodeGen/ExpandLargeDivRem.cpp

index fa1288a287d3db96323fabfde250e13edb06312d..1fa2993fd5f9080e391741951122831fddec58e1 100644 (file)
--- a/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
+++ b/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
@@ -18,6 +18,7 @@
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/ADT/StringExtras.h"
  #include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
  #include "llvm/CodeGen/Passes.h"
  #include "llvm/IR/IRBuilder.h"
  #include "llvm/IR/InstIterator.h"
@@ -30,14 +31,37 @@
  using namespace llvm;
  
  static cl::opt<unsigned>
-    ExpandDivRemBits("expand-div-rem-bits", cl::Hidden, cl::init(128),
+    ExpandDivRemBits("expand-div-rem-bits", cl::Hidden,
+                     cl::init(llvm::IntegerType::MAX_INT_BITS),
                       cl::desc("div and rem instructions on integers with "
                                "more than <N> bits are expanded."));
  
-static bool runImpl(Function &F) {
+static bool isConstantPowerOfTwo(llvm::Value *V, bool SignedOp) {
+  auto *C = dyn_cast<ConstantInt>(V);
+  if (!C)
+    return false;
+
+  APInt Val = C->getValue();
+  if (SignedOp && Val.isNegative())
+    Val = -Val;
+  return Val.isPowerOf2();
+}
+
+static bool isSigned(unsigned int Opcode) {
+  return Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
+}
+
+static bool runImpl(Function &F, const TargetTransformInfo &TTI) {
    SmallVector<BinaryOperator *, 4> Replace;
    bool Modified = false;
  
+  unsigned MaxLegalDivRemBitWidth = TTI.maxLegalDivRemBitWidth();
+  if (ExpandDivRemBits != llvm::IntegerType::MAX_INT_BITS)
+    MaxLegalDivRemBitWidth = ExpandDivRemBits;
+
+  if (MaxLegalDivRemBitWidth >= llvm::IntegerType::MAX_INT_BITS)
+    return false;
+
    for (auto &I : instructions(F)) {
      switch (I.getOpcode()) {
      case Instruction::UDiv:
@@ -46,7 +70,11 @@ static bool runImpl(Function &F) {
      case Instruction::SRem: {
        // TODO: This doesn't handle vectors.
        auto *IntTy = dyn_cast<IntegerType>(I.getType());
-      if (!IntTy || IntTy->getIntegerBitWidth() <= ExpandDivRemBits)
+      if (!IntTy || IntTy->getIntegerBitWidth() <= MaxLegalDivRemBitWidth)
+        continue;
+
+      // The backend has peephole optimizations for powers of two.
+      if (isConstantPowerOfTwo(I.getOperand(1), isSigned(I.getOpcode())))
          continue;
  
        Replace.push_back(&cast<BinaryOperator>(I));
@@ -77,7 +105,8 @@ static bool runImpl(Function &F) {
  
  PreservedAnalyses ExpandLargeDivRemPass::run(Function &F,
                                               FunctionAnalysisManager &AM) {
-  bool Changed = runImpl(F);
+  TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
+  bool Changed = runImpl(F, TTI);
  
    if (Changed)
      return PreservedAnalyses::none();
@@ -93,9 +122,13 @@ public:
      initializeExpandLargeDivRemLegacyPassPass(*PassRegistry::getPassRegistry());
    }
  
-  bool runOnFunction(Function &F) override { return runImpl(F); }
+  bool runOnFunction(Function &F) override {
+    auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+    return runImpl(F, TTI);
+  }
  
    void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetTransformInfoWrapperPass>();
      AU.addPreserved<AAResultsWrapperPass>();
      AU.addPreserved<GlobalsAAWrapperPass>();
    }
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp

index ab8a872699ed2980decc54748cbe238cdec720df..09cc3143a74e858e84e3c6dce089c7231970ba0f 100644 (file)
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1113,6 +1113,7 @@ bool TargetPassConfig::addISelPasses() {
  
    addPass(createPreISelIntrinsicLoweringPass());
    PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
+  addPass(createExpandLargeDivRemPass());
    addIRPasses();
    addCodeGenPrepare();
    addPassesToHandleExceptions();
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

index 955e90e19ffa42172aa8370af11b097ac1017824..bf917b5b9d84b99b4821c02a0c61f9f9175e9efd 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -319,6 +319,8 @@ public:
  
    bool enableOrderedReductions() const { return true; }
  
+  unsigned maxLegalDivRemBitWidth() const { return 128; }
+
    InstructionCost getInterleavedMemoryOpCost(
        unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
        Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h

index c0180923c240a7f6dbbcfd62cbf34f1accaa31e0..3466be0001a8f61509d9a603908f2a41d8bb9d5f 100644 (file)
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -207,6 +207,8 @@ public:
      return isLegalMaskedGather(Ty, Alignment);
    }
  
+  unsigned maxLegalDivRemBitWidth() const { return 64; }
+
    InstructionCost getMemcpyCost(const Instruction *I);
  
    int getNumMemOps(const IntrinsicInst *I) const;
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp

index 538e724ed28c6ba9f534e6e6cef97ec3ad6b625a..2aaac9926c80acee4e5b5ab8b30806e5c16dc853 100644 (file)
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -5734,6 +5734,10 @@ bool X86TTIImpl::isExpensiveToSpeculativelyExecute(const Instruction* I) {
    return BaseT::isExpensiveToSpeculativelyExecute(I);
  }
  
+unsigned X86TTIImpl::maxLegalDivRemBitWidth() const {
+  return ST->is64Bit() ? 128 : 64;
+}
+
  bool X86TTIImpl::isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
    return false;
  }
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h

index f74433beb02af66c6ca8177198b5f6da80526e44..18db9999a467a5bd3596ffe34b15d60cdc6bbb25 100644 (file)
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -255,6 +255,7 @@ public:
                         const SmallBitVector &OpcodeMask) const;
    bool hasDivRemOp(Type *DataType, bool IsSigned);
    bool isExpensiveToSpeculativelyExecute(const Instruction *I);
+  unsigned maxLegalDivRemBitWidth() const;
    bool isFCmpOrdCheaperThanFCmpZero(Type *Ty);
    bool areInlineCompatible(const Function *Caller,
                             const Function *Callee) const;
diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll

index a3e0943c701496c6054634735a23eced37a9063d..3c42d1adecd31245fa599a5eb9fe268a4ba80be2 100644 (file)
--- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -15,6 +15,7 @@
  ; CHECK-NEXT:   ModulePass Manager
  ; CHECK-NEXT:     Pre-ISel Intrinsic Lowering
  ; CHECK-NEXT:     FunctionPass Manager
+; CHECK-NEXT:       Expand large div/rem
  ; CHECK-NEXT:       Expand Atomic instructions
  ; CHECK-NEXT:       Module Verifier
  ; CHECK-NEXT:       Lower Garbage Collection Instructions
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll

index 70ffa2640475d29a2a97103b7d3574c5f1e92ee5..5c016b7c9b38b65e62bd95e187a6203084e5ac84 100644 (file)
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -18,6 +18,7 @@
  ; CHECK-NEXT:   ModulePass Manager
  ; CHECK-NEXT:     Pre-ISel Intrinsic Lowering
  ; CHECK-NEXT:     FunctionPass Manager
+; CHECK-NEXT:       Expand large div/rem
  ; CHECK-NEXT:       Expand Atomic instructions
  ; CHECK-NEXT:     SVE intrinsics optimizations
  ; CHECK-NEXT:       FunctionPass Manager
diff --git a/llvm/test/CodeGen/AArch64/udivmodei5.ll b/llvm/test/CodeGen/AArch64/udivmodei5.ll

new file mode 100644 (file)

index 0000000..6157377
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/udivmodei5.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=aarch64-linux-gnuabi < %s | FileCheck %s
+
+define i65 @udiv65(i65 %a, i65 %b) nounwind {
+; CHECK-LABEL: udiv65:
+; CHECK-NOT:     call
+  %res = udiv i65 %a, %b
+  ret i65 %res
+}
+
+define i129 @udiv129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: udiv129:
+; CHECK-NOT:     call
+  %res = udiv i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @urem129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: urem129:
+; CHECK-NOT:     call
+  %res = urem i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @sdiv129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: sdiv129:
+; CHECK-NOT:     call
+  %res = sdiv i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @srem129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: srem129:
+; CHECK-NOT:     call
+  %res = srem i129 %a, %b
+  ret i129 %res
+}
+
+; Some higher sizes
+define i257 @sdiv257(i257 %a, i257 %b) nounwind {
+; CHECK-LABEL: sdiv257:
+; CHECK-NOT:     call
+  %res = sdiv i257 %a, %b
+  ret i257 %res
+}
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll

index 098422a4a770d597f24295a8bea2f4b2eacfe42d..f1a12254866c930030a3e66895d0f1100a8bc075 100644 (file)
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -5,6 +5,7 @@
  ; CHECK:       ModulePass Manager
  ; CHECK-NEXT:    Pre-ISel Intrinsic Lowering
  ; CHECK-NEXT:    FunctionPass Manager
+; CHECK-NEXT:      Expand large div/rem
  ; CHECK-NEXT:      Expand Atomic instructions
  ; CHECK-NEXT:      Simplify the CFG
  ; CHECK-NEXT:      Dominator Tree Construction
diff --git a/llvm/test/CodeGen/ARM/udivmodei5.ll b/llvm/test/CodeGen/ARM/udivmodei5.ll

new file mode 100644 (file)

index 0000000..4f337f3
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/udivmodei5.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=arm-eabi < %s | FileCheck %s
+
+define i65 @udiv65(i65 %a, i65 %b) nounwind {
+; CHECK-LABEL: udiv65:
+; CHECK-NOT:     call
+  %res = udiv i65 %a, %b
+  ret i65 %res
+}
+
+define i129 @udiv129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: udiv129:
+; CHECK-NOT:     call
+  %res = udiv i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @urem129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: urem129:
+; CHECK-NOT:     call
+  %res = urem i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @sdiv129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: sdiv129:
+; CHECK-NOT:     call
+  %res = sdiv i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @srem129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: srem129:
+; CHECK-NOT:     call
+  %res = srem i129 %a, %b
+  ret i129 %res
+}
+
+; Some higher sizes
+define i257 @sdiv257(i257 %a, i257 %b) nounwind {
+; CHECK-LABEL: sdiv257:
+; CHECK-NOT:     call
+  %res = sdiv i257 %a, %b
+  ret i257 %res
+}
diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll

index 1c80d677f78f655bdbecc2aa05e2296b39b269df..d762a5212fd2c35b82ddddaae0b6a43736d41be6 100644 (file)
--- a/llvm/test/CodeGen/X86/O0-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -17,6 +17,7 @@
  ; CHECK-NEXT:   ModulePass Manager
  ; CHECK-NEXT:     Pre-ISel Intrinsic Lowering
  ; CHECK-NEXT:     FunctionPass Manager
+; CHECK-NEXT:       Expand large div/rem
  ; CHECK-NEXT:       Expand Atomic instructions
  ; CHECK-NEXT:       Lower AMX intrinsics
  ; CHECK-NEXT:       Lower AMX type for load/store
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll

index 1b73acbcb682821a7404d124aa7e4c03221b23c9..914a2f10323986e8124a971865743d2b20a9cf3b 100644 (file)
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
@@ -171,101 +171,8 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind {
  
  define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
  ; X86-LABEL: scalar_i128:
-; X86:       # %bb.0:
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $40, %esp
-; X86-NEXT:    movl 44(%ebp), %edi
-; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    pushl 40(%ebp)
-; X86-NEXT:    pushl 36(%ebp)
-; X86-NEXT:    pushl 32(%ebp)
-; X86-NEXT:    pushl 28(%ebp)
-; X86-NEXT:    pushl 24(%ebp)
-; X86-NEXT:    pushl 20(%ebp)
-; X86-NEXT:    pushl 16(%ebp)
-; X86-NEXT:    pushl 12(%ebp)
-; X86-NEXT:    pushl %eax
-; X86-NEXT:    calll __divti3
-; X86-NEXT:    addl $32, %esp
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %edi, %edx
-; X86-NEXT:    movl %ecx, 12(%edi)
-; X86-NEXT:    movl %esi, 8(%edi)
-; X86-NEXT:    movl %eax, 4(%edi)
-; X86-NEXT:    movl %eax, %edi
-; X86-NEXT:    movl %ebx, (%edx)
-; X86-NEXT:    movl 28(%ebp), %eax
-; X86-NEXT:    imull %eax, %ecx
-; X86-NEXT:    mull %esi
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    addl %ecx, %edx
-; X86-NEXT:    imull 32(%ebp), %esi
-; X86-NEXT:    addl %edx, %esi
-; X86-NEXT:    movl 36(%ebp), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    imull %edi, %ecx
-; X86-NEXT:    mull %ebx
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    addl %ecx, %edx
-; X86-NEXT:    movl 40(%ebp), %eax
-; X86-NEXT:    imull %ebx, %eax
-; X86-NEXT:    addl %edx, %eax
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT:    adcl %esi, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %ebx, %eax
-; X86-NEXT:    movl 28(%ebp), %ecx
-; X86-NEXT:    mull %ecx
-; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %edi, %eax
-; X86-NEXT:    mull %ecx
-; X86-NEXT:    movl %edx, %ecx
-; X86-NEXT:    movl %eax, %esi
-; X86-NEXT:    addl (%esp), %esi # 4-byte Folded Reload
-; X86-NEXT:    adcl $0, %ecx
-; X86-NEXT:    movl %ebx, %eax
-; X86-NEXT:    mull 32(%ebp)
-; X86-NEXT:    movl %edx, %ebx
-; X86-NEXT:    addl %esi, %eax
-; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT:    adcl %ecx, %ebx
-; X86-NEXT:    setb %cl
-; X86-NEXT:    movl %edi, %eax
-; X86-NEXT:    mull 32(%ebp)
-; X86-NEXT:    addl %ebx, %eax
-; X86-NEXT:    movzbl %cl, %ecx
-; X86-NEXT:    adcl %ecx, %edx
-; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT:    movl 12(%ebp), %ecx
-; X86-NEXT:    subl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT:    movl 16(%ebp), %esi
-; X86-NEXT:    sbbl (%esp), %esi # 4-byte Folded Reload
-; X86-NEXT:    movl 20(%ebp), %edi
-; X86-NEXT:    sbbl %eax, %edi
-; X86-NEXT:    movl 24(%ebp), %ebx
-; X86-NEXT:    sbbl %edx, %ebx
-; X86-NEXT:    movl 8(%ebp), %eax
-; X86-NEXT:    movl %ecx, (%eax)
-; X86-NEXT:    movl %esi, 4(%eax)
-; X86-NEXT:    movl %edi, 8(%eax)
-; X86-NEXT:    movl %ebx, 12(%eax)
-; X86-NEXT:    leal -12(%ebp), %esp
-; X86-NEXT:    popl %esi
-; X86-NEXT:    popl %edi
-; X86-NEXT:    popl %ebx
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl $4
+; X86 doesn't have __divti3, so the urem is expanded into a loop.
+; X86: udiv-do-while
  ;
  ; X64-LABEL: scalar_i128:
  ; X64:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll

index 6643ada2f42b4451364a087c825b9ca6024d36b5..67650ec1a6e3fdaf8ec6eb5ab2099f0cd71b1903 100644 (file)
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
@@ -171,101 +171,8 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind {
  
  define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
  ; X86-LABEL: scalar_i128:
-; X86:       # %bb.0:
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $40, %esp
-; X86-NEXT:    movl 44(%ebp), %edi
-; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    pushl 40(%ebp)
-; X86-NEXT:    pushl 36(%ebp)
-; X86-NEXT:    pushl 32(%ebp)
-; X86-NEXT:    pushl 28(%ebp)
-; X86-NEXT:    pushl 24(%ebp)
-; X86-NEXT:    pushl 20(%ebp)
-; X86-NEXT:    pushl 16(%ebp)
-; X86-NEXT:    pushl 12(%ebp)
-; X86-NEXT:    pushl %eax
-; X86-NEXT:    calll __udivti3
-; X86-NEXT:    addl $32, %esp
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %edi, %edx
-; X86-NEXT:    movl %ecx, 12(%edi)
-; X86-NEXT:    movl %esi, 8(%edi)
-; X86-NEXT:    movl %eax, 4(%edi)
-; X86-NEXT:    movl %eax, %edi
-; X86-NEXT:    movl %ebx, (%edx)
-; X86-NEXT:    movl 28(%ebp), %eax
-; X86-NEXT:    imull %eax, %ecx
-; X86-NEXT:    mull %esi
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    addl %ecx, %edx
-; X86-NEXT:    imull 32(%ebp), %esi
-; X86-NEXT:    addl %edx, %esi
-; X86-NEXT:    movl 36(%ebp), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    imull %edi, %ecx
-; X86-NEXT:    mull %ebx
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    addl %ecx, %edx
-; X86-NEXT:    movl 40(%ebp), %eax
-; X86-NEXT:    imull %ebx, %eax
-; X86-NEXT:    addl %edx, %eax
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT:    adcl %esi, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %ebx, %eax
-; X86-NEXT:    movl 28(%ebp), %ecx
-; X86-NEXT:    mull %ecx
-; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %edi, %eax
-; X86-NEXT:    mull %ecx
-; X86-NEXT:    movl %edx, %ecx
-; X86-NEXT:    movl %eax, %esi
-; X86-NEXT:    addl (%esp), %esi # 4-byte Folded Reload
-; X86-NEXT:    adcl $0, %ecx
-; X86-NEXT:    movl %ebx, %eax
-; X86-NEXT:    mull 32(%ebp)
-; X86-NEXT:    movl %edx, %ebx
-; X86-NEXT:    addl %esi, %eax
-; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT:    adcl %ecx, %ebx
-; X86-NEXT:    setb %cl
-; X86-NEXT:    movl %edi, %eax
-; X86-NEXT:    mull 32(%ebp)
-; X86-NEXT:    addl %ebx, %eax
-; X86-NEXT:    movzbl %cl, %ecx
-; X86-NEXT:    adcl %ecx, %edx
-; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT:    movl 12(%ebp), %ecx
-; X86-NEXT:    subl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT:    movl 16(%ebp), %esi
-; X86-NEXT:    sbbl (%esp), %esi # 4-byte Folded Reload
-; X86-NEXT:    movl 20(%ebp), %edi
-; X86-NEXT:    sbbl %eax, %edi
-; X86-NEXT:    movl 24(%ebp), %ebx
-; X86-NEXT:    sbbl %edx, %ebx
-; X86-NEXT:    movl 8(%ebp), %eax
-; X86-NEXT:    movl %ecx, (%eax)
-; X86-NEXT:    movl %esi, 4(%eax)
-; X86-NEXT:    movl %edi, 8(%eax)
-; X86-NEXT:    movl %ebx, 12(%eax)
-; X86-NEXT:    leal -12(%ebp), %esp
-; X86-NEXT:    popl %esi
-; X86-NEXT:    popl %edi
-; X86-NEXT:    popl %ebx
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl $4
+; X86 doesn't have __divti3, so the urem is expanded into a loop.
+; X86: udiv-do-while
  ;
  ; X64-LABEL: scalar_i128:
  ; X64:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/i128-sdiv.ll b/llvm/test/CodeGen/X86/i128-sdiv.ll

index 5e0c79a2297940697925516db5e6ff0b47d242be..717f52f198ee8821c1664c3c270b237820af2f65 100644 (file)
--- a/llvm/test/CodeGen/X86/i128-sdiv.ll
+++ b/llvm/test/CodeGen/X86/i128-sdiv.ll
@@ -107,40 +107,8 @@ define i128 @test2(i128 %x) nounwind {
  
  define i128 @test3(i128 %x) nounwind {
  ; X86-LABEL: test3:
-; X86:       # %bb.0:
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $16, %esp
-; X86-NEXT:    movl 8(%ebp), %esi
-; X86-NEXT:    movl %esp, %eax
-; X86-NEXT:    pushl $-1
-; X86-NEXT:    pushl $-5
-; X86-NEXT:    pushl $-1
-; X86-NEXT:    pushl $-3
-; X86-NEXT:    pushl 24(%ebp)
-; X86-NEXT:    pushl 20(%ebp)
-; X86-NEXT:    pushl 16(%ebp)
-; X86-NEXT:    pushl 12(%ebp)
-; X86-NEXT:    pushl %eax
-; X86-NEXT:    calll __divti3
-; X86-NEXT:    addl $32, %esp
-; X86-NEXT:    movl (%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl %edi, 12(%esi)
-; X86-NEXT:    movl %edx, 8(%esi)
-; X86-NEXT:    movl %ecx, 4(%esi)
-; X86-NEXT:    movl %eax, (%esi)
-; X86-NEXT:    movl %esi, %eax
-; X86-NEXT:    leal -8(%ebp), %esp
-; X86-NEXT:    popl %esi
-; X86-NEXT:    popl %edi
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl $4
+; X86 doesn't have __divti3, so the urem is expanded into a loop.
+; X86: udiv-do-while
  ;
  ; X64-LABEL: test3:
  ; X64:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/i128-udiv.ll b/llvm/test/CodeGen/X86/i128-udiv.ll

index 05049dc6254a6ce0f42c4b186abea12a18edcdc8..3f890b7f2443a94acfb26f207a43189df4ad8c35 100644 (file)
--- a/llvm/test/CodeGen/X86/i128-udiv.ll
+++ b/llvm/test/CodeGen/X86/i128-udiv.ll
@@ -31,40 +31,8 @@ define i128 @test1(i128 %x) nounwind {
  
  define i128 @test2(i128 %x) nounwind {
  ; X86-LABEL: test2:
-; X86:       # %bb.0:
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $16, %esp
-; X86-NEXT:    movl 8(%ebp), %esi
-; X86-NEXT:    movl %esp, %eax
-; X86-NEXT:    pushl $-1
-; X86-NEXT:    pushl $-4
-; X86-NEXT:    pushl $0
-; X86-NEXT:    pushl $0
-; X86-NEXT:    pushl 24(%ebp)
-; X86-NEXT:    pushl 20(%ebp)
-; X86-NEXT:    pushl 16(%ebp)
-; X86-NEXT:    pushl 12(%ebp)
-; X86-NEXT:    pushl %eax
-; X86-NEXT:    calll __udivti3
-; X86-NEXT:    addl $32, %esp
-; X86-NEXT:    movl (%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl %edi, 12(%esi)
-; X86-NEXT:    movl %edx, 8(%esi)
-; X86-NEXT:    movl %ecx, 4(%esi)
-; X86-NEXT:    movl %eax, (%esi)
-; X86-NEXT:    movl %esi, %eax
-; X86-NEXT:    leal -8(%ebp), %esp
-; X86-NEXT:    popl %esi
-; X86-NEXT:    popl %edi
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl $4
+; X86 doesn't have __divti3, so the urem is expanded into a loop.
+; X86: udiv-do-while
  ;
  ; X64-LABEL: test2:
  ; X64:       # %bb.0:
@@ -80,40 +48,8 @@ define i128 @test2(i128 %x) nounwind {
  
  define i128 @test3(i128 %x) nounwind {
  ; X86-LABEL: test3:
-; X86:       # %bb.0:
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $16, %esp
-; X86-NEXT:    movl 8(%ebp), %esi
-; X86-NEXT:    movl %esp, %eax
-; X86-NEXT:    pushl $-1
-; X86-NEXT:    pushl $-5
-; X86-NEXT:    pushl $-1
-; X86-NEXT:    pushl $-3
-; X86-NEXT:    pushl 24(%ebp)
-; X86-NEXT:    pushl 20(%ebp)
-; X86-NEXT:    pushl 16(%ebp)
-; X86-NEXT:    pushl 12(%ebp)
-; X86-NEXT:    pushl %eax
-; X86-NEXT:    calll __udivti3
-; X86-NEXT:    addl $32, %esp
-; X86-NEXT:    movl (%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl %edi, 12(%esi)
-; X86-NEXT:    movl %edx, 8(%esi)
-; X86-NEXT:    movl %ecx, 4(%esi)
-; X86-NEXT:    movl %eax, (%esi)
-; X86-NEXT:    movl %esi, %eax
-; X86-NEXT:    leal -8(%ebp), %esp
-; X86-NEXT:    popl %esi
-; X86-NEXT:    popl %edi
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    retl $4
+; X86 doesn't have __divti3, so the urem is expanded into a loop.
+; X86: udiv-do-while
  ;
  ; X64-LABEL: test3:
  ; X64:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/libcall-sret.ll b/llvm/test/CodeGen/X86/libcall-sret.ll

deleted file mode 100644 (file)

index 661c631..0000000
--- a/llvm/test/CodeGen/X86/libcall-sret.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: llc -mtriple=i686-linux-gnu -o - %s | FileCheck %s
-
-@var = global i128 0
-
-; We were trying to convert the i128 operation into a libcall, but failing to
-; perform sret demotion when we couldn't return the result in registers. Make
-; sure we marshal the return properly:
-
-define void @test_sret_libcall(i128 %l, i128 %r) {
-; CHECK-LABEL: test_sret_libcall:
-
-  ; Stack for call: 4(sret ptr), 16(i128 %l), 16(128 %r). So next logical
-  ; (aligned) place for the actual sret data is %esp + 20.
-; CHECK: leal 20(%esp), [[SRET_ADDR:%[a-z]+]]
-; CHECK: pushl 72(%esp)
-; CHECK: pushl 72(%esp)
-; CHECK: pushl 72(%esp)
-; CHECK: pushl 72(%esp)
-; CHECK: pushl 72(%esp)
-; CHECK: pushl 72(%esp)
-; CHECK: pushl 72(%esp)
-; CHECK: pushl 72(%esp)
-; CHECK: pushl [[SRET_ADDR]]
-
-; CHECK: calll __udivti3
-
-; CHECK: addl $44, %esp
-; CHECK-DAG: movl 8(%esp), [[RES0:%[a-z]+]]
-; CHECK-DAG: movl 12(%esp), [[RES1:%[a-z]+]]
-; CHECK-DAG: movl 16(%esp), [[RES2:%[a-z]+]]
-; CHECK-DAG: movl 20(%esp), [[RES3:%[a-z]+]]
-; CHECK-DAG: movl [[RES0]], var
-; CHECK-DAG: movl [[RES1]], var+4
-; CHECK-DAG: movl [[RES2]], var+8
-; CHECK-DAG: movl [[RES3]], var+12
-  %quot = udiv i128 %l, %r
-  store i128 %quot, ptr @var
-  ret void
-}
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll

index 3f9acba27810f6b7d5a14037426758e4026b5e64..f9952db9d4cb542e38c4f7e7849cfcc9b0e15590 100644 (file)
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -26,6 +26,7 @@
  ; CHECK-NEXT:   ModulePass Manager
  ; CHECK-NEXT:     Pre-ISel Intrinsic Lowering
  ; CHECK-NEXT:     FunctionPass Manager
+; CHECK-NEXT:       Expand large div/rem
  ; CHECK-NEXT:       Expand Atomic instructions
  ; CHECK-NEXT:       Lower AMX intrinsics
  ; CHECK-NEXT:       Lower AMX type for load/store
diff --git a/llvm/test/CodeGen/X86/pr38539.ll b/llvm/test/CodeGen/X86/pr38539.ll

index 8736d8e91e768f26e4aaf05019d6692ddf9f2a2b..97f5985cf9092b946e0392459f86855057c16b4c 100644 (file)
--- a/llvm/test/CodeGen/X86/pr38539.ll
+++ b/llvm/test/CodeGen/X86/pr38539.ll
@@ -13,26 +13,6 @@ define void @f() {
  ; X64-NEXT:    movq %rax, (%rax)
  ; X64-NEXT:    movb $0, (%rax)
  ; X64-NEXT:    retq
-;
-; X86-LABEL: f:
-; X86:       # %bb.0: # %BB
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %ebp, -8
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    .cfi_def_cfa_register %ebp
-; X86-NEXT:    andl $-8, %esp
-; X86-NEXT:    subl $16, %esp
-; X86-NEXT:    movzbl (%eax), %eax
-; X86-NEXT:    cmpb $0, (%eax)
-; X86-NEXT:    setne (%eax)
-; X86-NEXT:    leal -{{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, (%eax)
-; X86-NEXT:    movb $0, (%eax)
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
-; X86-NEXT:    .cfi_def_cfa %esp, 4
-; X86-NEXT:    retl
  BB:
    %A30 = alloca i66
    %L17 = load i66, ptr %A30
diff --git a/llvm/test/CodeGen/X86/udivmodei5.ll b/llvm/test/CodeGen/X86/udivmodei5.ll

new file mode 100644 (file)

index 0000000..2c30357
--- /dev/null
+++ b/llvm/test/CodeGen/X86/udivmodei5.ll
@@ -0,0 +1,70 @@
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+
+; On i686, this is expanded into a loop. On x86_64, this calls __udivti3.
+define i65 @udiv65(i65 %a, i65 %b) nounwind {
+; X86-LABEL: udiv65:
+; X86-NOT:     call
+;
+; X64-LABEL: udiv65:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    andl $1, %esi
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    callq __udivti3@PLT
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %res = udiv i65 %a, %b
+  ret i65 %res
+}
+
+define i129 @udiv129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: udiv129:
+; X86-NOT:     call
+;
+; X64-LABEL: udiv129:
+; X64-NOT:     call
+  %res = udiv i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @urem129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: urem129:
+; X86-NOT:     call
+;
+; X64-LABEL: urem129:
+; X64-NOT:     call
+  %res = urem i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @sdiv129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: sdiv129:
+; X86-NOT:     call
+;
+; X64-LABEL: sdiv129:
+; X64-NOT:     call
+  %res = sdiv i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @srem129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: srem129:
+; X86-NOT:     call
+;
+; X64-LABEL: srem129:
+; X64-NOT:     call
+  %res = srem i129 %a, %b
+  ret i129 %res
+}
+
+; Some higher sizes
+define i257 @sdiv257(i257 %a, i257 %b) nounwind {
+; X86-LABEL: sdiv257:
+; X86-NOT:     call
+;
+; X64-LABEL: sdiv257:
+; X64-NOT:     call
+  %res = sdiv i257 %a, %b
+  ret i257 %res
+}
author	Matthias Gehre <matthias.gehre@xilinx.com>
	Tue, 19 Jul 2022 10:28:54 +0000 (11:28 +0100)
committer	Matthias Gehre <matthias.gehre@xilinx.com>
	Tue, 6 Sep 2022 14:32:04 +0000 (15:32 +0100)
llvm/include/llvm/Analysis/TargetTransformInfo.h		patch \| blob \| history
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h		patch \| blob \| history
llvm/lib/Analysis/TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/CodeGen/ExpandLargeDivRem.cpp		patch \| blob \| history
llvm/lib/CodeGen/TargetPassConfig.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/ARM/ARMTargetTransformInfo.h		patch \| blob \| history
llvm/lib/Target/X86/X86TargetTransformInfo.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86TargetTransformInfo.h		patch \| blob \| history
llvm/test/CodeGen/AArch64/O0-pipeline.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/O3-pipeline.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/udivmodei5.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/ARM/O3-pipeline.ll		patch \| blob \| history
llvm/test/CodeGen/ARM/udivmodei5.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/X86/O0-pipeline.ll		patch \| blob \| history
llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll		patch \| blob \| history
llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll		patch \| blob \| history
llvm/test/CodeGen/X86/i128-sdiv.ll		patch \| blob \| history
llvm/test/CodeGen/X86/i128-udiv.ll		patch \| blob \| history
llvm/test/CodeGen/X86/libcall-sret.ll	[deleted file]	patch \| blob \| history
llvm/test/CodeGen/X86/opt-pipeline.ll		patch \| blob \| history
llvm/test/CodeGen/X86/pr38539.ll		patch \| blob \| history
llvm/test/CodeGen/X86/udivmodei5.ll	[new file with mode: 0644]	patch \| blob