--- /dev/null
+//===----- ExpandLargeDivRem.h - Expand large div/rem ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_EXPANDLARGEDIVREM_H
+#define LLVM_CODEGEN_EXPANDLARGEDIVREM_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// Expands div/rem instructions with a bitwidth above a threshold
+/// into a loop.
+/// This is useful for backends like x86 that cannot lower divisions
+/// with more than 128 bits.
+class ExpandLargeDivRemPass : public PassInfoMixin<ExpandLargeDivRemPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+ // The backend asserts when seeing large div/rem instructions.
+ static bool isRequired() { return true; }
+};
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_EXPANDLARGEDIVREM_H
FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ())
FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false))
FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true))
+FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass, ())
FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ())
FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ())
FUNCTION_PASS("lowerinvoke", LowerInvokePass, ())
/// predicate mask.
FunctionPass *createExpandVectorPredicationPass();
+ // Expands large div/rem instructions.
+ FunctionPass *createExpandLargeDivRemPass();
+
// This pass expands memcmp() to load/stores.
FunctionPass *createExpandMemCmpPass();
void initializeEdgeBundlesPass(PassRegistry&);
void initializeEHContGuardCatchretPass(PassRegistry &);
void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&);
+void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
void initializeExpandMemCmpPassPass(PassRegistry&);
void initializeExpandPostRAPass(PassRegistry&);
void initializeExpandReductionsPass(PassRegistry&);
(void) llvm::createReversePostOrderFunctionAttrsPass();
(void) llvm::createMergeFunctionsPass();
(void) llvm::createMergeICmpsLegacyPass();
+ (void) llvm::createExpandLargeDivRemPass();
(void) llvm::createExpandMemCmpPass();
(void) llvm::createExpandVectorPredicationPass();
std::string buf;
EdgeBundles.cpp
EHContGuardCatchret.cpp
ExecutionDomainFix.cpp
+ ExpandLargeDivRem.cpp
ExpandMemCmp.cpp
ExpandPostRAPseudos.cpp
ExpandReductions.cpp
initializeEarlyIfPredicatorPass(Registry);
initializeEarlyMachineLICMPass(Registry);
initializeEarlyTailDuplicatePass(Registry);
+ initializeExpandLargeDivRemLegacyPassPass(Registry);
initializeExpandMemCmpPassPass(Registry);
initializeExpandPostRAPass(Registry);
initializeFEntryInserterPass(Registry);
--- /dev/null
+//===--- ExpandLargeDivRem.cpp - Expand large div/rem ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands div/rem instructions with a bitwidth above a threshold
+// into a call to auto-generated functions.
+// This is useful for targets like x86_64 that cannot lower divisions
+// with more than 128 bits or targets like x86_32 that cannot lower divisions
+// with more than 64 bits.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ExpandLargeDivRem.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/IntegerDivision.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+ ExpandDivRemBits("expand-div-rem-bits", cl::Hidden, cl::init(128),
+ cl::desc("div and rem instructions on integers with "
+ "more than <N> bits are expanded."));
+
+static bool runImpl(Function &F) {
+ SmallVector<BinaryOperator *, 4> Replace;
+ bool Modified = false;
+
+ for (auto &I : instructions(F)) {
+ switch (I.getOpcode()) {
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem: {
+ // TODO: This doesn't handle vectors.
+ auto *IntTy = dyn_cast<IntegerType>(I.getType());
+ if (!IntTy || IntTy->getIntegerBitWidth() <= ExpandDivRemBits)
+ continue;
+
+ Replace.push_back(&cast<BinaryOperator>(I));
+ Modified = true;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (Replace.empty())
+ return false;
+
+ while (!Replace.empty()) {
+ BinaryOperator *I = Replace.pop_back_val();
+
+ if (I->getOpcode() == Instruction::UDiv ||
+ I->getOpcode() == Instruction::SDiv) {
+ expandDivision(I);
+ } else {
+ expandRemainder(I);
+ }
+ }
+
+ return Modified;
+}
+
+PreservedAnalyses ExpandLargeDivRemPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ bool Changed = runImpl(F);
+
+ if (Changed)
+ return PreservedAnalyses::none();
+
+ return PreservedAnalyses::all();
+}
+
+class ExpandLargeDivRemLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ ExpandLargeDivRemLegacyPass() : FunctionPass(ID) {
+ initializeExpandLargeDivRemLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override { return runImpl(F); }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+
+char ExpandLargeDivRemLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ExpandLargeDivRemLegacyPass, "expand-large-div-rem",
+ "Expand large div/rem", false, false)
+INITIALIZE_PASS_END(ExpandLargeDivRemLegacyPass, "expand-large-div-rem",
+ "Expand large div/rem", false, false)
+
+FunctionPass *llvm::createExpandLargeDivRemPass() {
+ return new ExpandLargeDivRemLegacyPass();
+}
static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor,
IRBuilder<> &Builder) {
unsigned BitWidth = Dividend->getType()->getIntegerBitWidth();
- ConstantInt *Shift;
-
- if (BitWidth == 64) {
- Shift = Builder.getInt64(63);
- } else {
- assert(BitWidth == 32 && "Unexpected bit width");
- Shift = Builder.getInt32(31);
- }
+ ConstantInt *Shift = Builder.getIntN(BitWidth, BitWidth - 1);
// Following instructions are generated for both i32 (shift 31) and
// i64 (shift 63).
// Implementation taken from compiler-rt's __divsi3 and __divdi3
unsigned BitWidth = Dividend->getType()->getIntegerBitWidth();
- ConstantInt *Shift;
-
- if (BitWidth == 64) {
- Shift = Builder.getInt64(63);
- } else {
- assert(BitWidth == 32 && "Unexpected bit width");
- Shift = Builder.getInt32(31);
- }
+ ConstantInt *Shift = Builder.getIntN(BitWidth, BitWidth - 1);
// Following instructions are generated for both i32 (shift 31) and
// i64 (shift 63).
IntegerType *DivTy = cast<IntegerType>(Dividend->getType());
unsigned BitWidth = DivTy->getBitWidth();
- ConstantInt *Zero;
- ConstantInt *One;
- ConstantInt *NegOne;
- ConstantInt *MSB;
-
- if (BitWidth == 64) {
- Zero = Builder.getInt64(0);
- One = Builder.getInt64(1);
- NegOne = ConstantInt::getSigned(DivTy, -1);
- MSB = Builder.getInt64(63);
- } else {
- assert(BitWidth == 32 && "Unexpected bit width");
- Zero = Builder.getInt32(0);
- One = Builder.getInt32(1);
- NegOne = ConstantInt::getSigned(DivTy, -1);
- MSB = Builder.getInt32(31);
- }
+ ConstantInt *Zero = ConstantInt::get(DivTy, 0);
+ ConstantInt *One = ConstantInt::get(DivTy, 1);
+ ConstantInt *NegOne = ConstantInt::getSigned(DivTy, -1);
+ ConstantInt *MSB = ConstantInt::get(DivTy, BitWidth - 1);
ConstantInt *True = Builder.getTrue();
/// Generate code to calculate the remainder of two integers, replacing Rem with
/// the generated code. This currently generates code using the udiv expansion,
/// but future work includes generating more specialized code, e.g. when more
-/// information about the operands are known. Implements both 32bit and 64bit
-/// scalar division.
+/// information about the operands are known.
///
/// Replace Rem with generated code.
bool llvm::expandRemainder(BinaryOperator *Rem) {
IRBuilder<> Builder(Rem);
assert(!Rem->getType()->isVectorTy() && "Div over vectors not supported");
- assert((Rem->getType()->getIntegerBitWidth() == 32 ||
- Rem->getType()->getIntegerBitWidth() == 64) &&
- "Div of bitwidth other than 32 or 64 not supported");
// First prepare the sign if it's a signed remainder
if (Rem->getOpcode() == Instruction::SRem) {
return true;
}
-
/// Generate code to divide two integers, replacing Div with the generated
/// code. This currently generates code similarly to compiler-rt's
/// implementations, but future work includes generating more specialized code
-/// when more information about the operands are known. Implements both
-/// 32bit and 64bit scalar division.
+/// when more information about the operands are known.
///
/// Replace Div with generated code.
bool llvm::expandDivision(BinaryOperator *Div) {
IRBuilder<> Builder(Div);
assert(!Div->getType()->isVectorTy() && "Div over vectors not supported");
- assert((Div->getType()->getIntegerBitWidth() == 32 ||
- Div->getType()->getIntegerBitWidth() == 64) &&
- "Div of bitwidth other than 32 or 64 not supported");
// First prepare the sign if it's a signed division
if (Div->getOpcode() == Instruction::SDiv) {
unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
- assert(RemTyBitWidth <= 64 && "Div of bitwidth greater than 64 not supported");
-
- if (RemTyBitWidth == 64)
+ if (RemTyBitWidth >= 64)
return expandRemainder(Rem);
// If bitwidth smaller than 64 extend inputs, extend output and proceed
unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
- assert(DivTyBitWidth <= 64 &&
- "Div of bitwidth greater than 64 not supported");
-
- if (DivTyBitWidth == 64)
+ if (DivTyBitWidth >= 64)
return expandDivision(Div);
// If bitwidth smaller than 64 extend inputs, extend output and proceed
; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=34366
define void @ossfuzz34366() {
; X86-LABEL: ossfuzz34366:
-; X86: # %bb.0:
-; X86-NEXT: movl (%eax), %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
-; X86-NEXT: orl %eax, %ecx
-; X86-NEXT: sete (%eax)
-; X86-NEXT: retl
-;
; X64-LABEL: ossfuzz34366:
-; X64: # %bb.0:
-; X64-NEXT: movq (%rax), %rax
-; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
-; X64-NEXT: andq %rax, %rcx
-; X64-NEXT: orq %rax, %rcx
-; X64-NEXT: sete (%rax)
-; X64-NEXT: retq
%L10 = load i448, ptr undef, align 4
%B18 = urem i448 %L10, -363419362147803445274661903944002267176820680343659030140745099590319644056698961663095525356881782780381260803133088966767300814307328
%C13 = icmp ule i448 %B18, 0
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -expand-large-div-rem < %s | FileCheck %s
+
+define void @sdiv129(i129* %ptr, i129* %out) nounwind {
+; CHECK-LABEL: @sdiv129(
+; CHECK-NEXT: _udiv-special-cases:
+; CHECK-NEXT: [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = ashr i129 [[A]], 128
+; CHECK-NEXT: [[TMP1:%.*]] = xor i129 [[TMP0]], [[A]]
+; CHECK-NEXT: [[TMP2:%.*]] = sub i129 [[TMP1]], [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i129 0, [[TMP0]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i129 [[TMP2]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = or i1 false, [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call i129 @llvm.ctlz.i129(i129 3, i1 true)
+; CHECK-NEXT: [[TMP7:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP2]], i1 true)
+; CHECK-NEXT: [[TMP8:%.*]] = sub i129 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i129 [[TMP8]], 128
+; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP5]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i129 [[TMP8]], 128
+; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP10]], i129 0, i129 [[TMP2]]
+; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP10]], [[TMP11]]
+; CHECK-NEXT: br i1 [[TMP13]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]]
+; CHECK: udiv-loop-exit:
+; CHECK-NEXT: [[TMP14:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP29:%.*]], [[UDIV_DO_WHILE:%.*]] ]
+; CHECK-NEXT: [[TMP15:%.*]] = phi i129 [ [[TMP37:%.*]], [[UDIV_BB1]] ], [ [[TMP26:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP16:%.*]] = shl i129 [[TMP15]], 1
+; CHECK-NEXT: [[TMP17:%.*]] = or i129 [[TMP14]], [[TMP16]]
+; CHECK-NEXT: br label [[UDIV_END]]
+; CHECK: udiv-do-while:
+; CHECK-NEXT: [[TMP18:%.*]] = phi i129 [ 0, [[UDIV_PREHEADER:%.*]] ], [ [[TMP29]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP19:%.*]] = phi i129 [ [[TMP35:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP32:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP20:%.*]] = phi i129 [ [[TMP34:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP31:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP21:%.*]] = phi i129 [ [[TMP37]], [[UDIV_PREHEADER]] ], [ [[TMP26]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP22:%.*]] = shl i129 [[TMP20]], 1
+; CHECK-NEXT: [[TMP23:%.*]] = lshr i129 [[TMP21]], 128
+; CHECK-NEXT: [[TMP24:%.*]] = or i129 [[TMP22]], [[TMP23]]
+; CHECK-NEXT: [[TMP25:%.*]] = shl i129 [[TMP21]], 1
+; CHECK-NEXT: [[TMP26]] = or i129 [[TMP18]], [[TMP25]]
+; CHECK-NEXT: [[TMP27:%.*]] = sub i129 2, [[TMP24]]
+; CHECK-NEXT: [[TMP28:%.*]] = ashr i129 [[TMP27]], 128
+; CHECK-NEXT: [[TMP29]] = and i129 [[TMP28]], 1
+; CHECK-NEXT: [[TMP30:%.*]] = and i129 [[TMP28]], 3
+; CHECK-NEXT: [[TMP31]] = sub i129 [[TMP24]], [[TMP30]]
+; CHECK-NEXT: [[TMP32]] = add i129 [[TMP19]], -1
+; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i129 [[TMP32]], 0
+; CHECK-NEXT: br i1 [[TMP33]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]]
+; CHECK: udiv-preheader:
+; CHECK-NEXT: [[TMP34]] = lshr i129 [[TMP2]], [[TMP35]]
+; CHECK-NEXT: br label [[UDIV_DO_WHILE]]
+; CHECK: udiv-bb1:
+; CHECK-NEXT: [[TMP35]] = add i129 [[TMP8]], 1
+; CHECK-NEXT: [[TMP36:%.*]] = sub i129 128, [[TMP8]]
+; CHECK-NEXT: [[TMP37]] = shl i129 [[TMP2]], [[TMP36]]
+; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i129 [[TMP35]], 0
+; CHECK-NEXT: br i1 [[TMP38]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]]
+; CHECK: udiv-end:
+; CHECK-NEXT: [[TMP39:%.*]] = phi i129 [ [[TMP17]], [[UDIV_LOOP_EXIT]] ], [ [[TMP12]], [[_UDIV_SPECIAL_CASES:%.*]] ]
+; CHECK-NEXT: [[TMP40:%.*]] = xor i129 [[TMP39]], [[TMP3]]
+; CHECK-NEXT: [[TMP41:%.*]] = sub i129 [[TMP40]], [[TMP3]]
+; CHECK-NEXT: store i129 [[TMP41]], i129* [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+ %a = load i129, i129* %ptr
+ %res = sdiv i129 %a, 3
+ store i129 %res, i129* %out
+ ret void
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -expand-large-div-rem < %s | FileCheck %s
+
+define void @test(i129* %ptr, i129* %out) nounwind {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: _udiv-special-cases:
+; CHECK-NEXT: [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = ashr i129 [[A]], 128
+; CHECK-NEXT: [[TMP1:%.*]] = xor i129 [[A]], [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = sub i129 [[TMP1]], [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i129 [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = or i1 false, [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = call i129 @llvm.ctlz.i129(i129 3, i1 true)
+; CHECK-NEXT: [[TMP6:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP2]], i1 true)
+; CHECK-NEXT: [[TMP7:%.*]] = sub i129 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i129 [[TMP7]], 128
+; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP4]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i129 [[TMP7]], 128
+; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], i129 0, i129 [[TMP2]]
+; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP9]], [[TMP10]]
+; CHECK-NEXT: br i1 [[TMP12]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]]
+; CHECK: udiv-loop-exit:
+; CHECK-NEXT: [[TMP13:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP28:%.*]], [[UDIV_DO_WHILE:%.*]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = phi i129 [ [[TMP36:%.*]], [[UDIV_BB1]] ], [ [[TMP25:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP15:%.*]] = shl i129 [[TMP14]], 1
+; CHECK-NEXT: [[TMP16:%.*]] = or i129 [[TMP13]], [[TMP15]]
+; CHECK-NEXT: br label [[UDIV_END]]
+; CHECK: udiv-do-while:
+; CHECK-NEXT: [[TMP17:%.*]] = phi i129 [ 0, [[UDIV_PREHEADER:%.*]] ], [ [[TMP28]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP18:%.*]] = phi i129 [ [[TMP34:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP31:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP19:%.*]] = phi i129 [ [[TMP33:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP30:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP20:%.*]] = phi i129 [ [[TMP36]], [[UDIV_PREHEADER]] ], [ [[TMP25]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP21:%.*]] = shl i129 [[TMP19]], 1
+; CHECK-NEXT: [[TMP22:%.*]] = lshr i129 [[TMP20]], 128
+; CHECK-NEXT: [[TMP23:%.*]] = or i129 [[TMP21]], [[TMP22]]
+; CHECK-NEXT: [[TMP24:%.*]] = shl i129 [[TMP20]], 1
+; CHECK-NEXT: [[TMP25]] = or i129 [[TMP17]], [[TMP24]]
+; CHECK-NEXT: [[TMP26:%.*]] = sub i129 2, [[TMP23]]
+; CHECK-NEXT: [[TMP27:%.*]] = ashr i129 [[TMP26]], 128
+; CHECK-NEXT: [[TMP28]] = and i129 [[TMP27]], 1
+; CHECK-NEXT: [[TMP29:%.*]] = and i129 [[TMP27]], 3
+; CHECK-NEXT: [[TMP30]] = sub i129 [[TMP23]], [[TMP29]]
+; CHECK-NEXT: [[TMP31]] = add i129 [[TMP18]], -1
+; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i129 [[TMP31]], 0
+; CHECK-NEXT: br i1 [[TMP32]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]]
+; CHECK: udiv-preheader:
+; CHECK-NEXT: [[TMP33]] = lshr i129 [[TMP2]], [[TMP34]]
+; CHECK-NEXT: br label [[UDIV_DO_WHILE]]
+; CHECK: udiv-bb1:
+; CHECK-NEXT: [[TMP34]] = add i129 [[TMP7]], 1
+; CHECK-NEXT: [[TMP35:%.*]] = sub i129 128, [[TMP7]]
+; CHECK-NEXT: [[TMP36]] = shl i129 [[TMP2]], [[TMP35]]
+; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i129 [[TMP34]], 0
+; CHECK-NEXT: br i1 [[TMP37]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]]
+; CHECK: udiv-end:
+; CHECK-NEXT: [[TMP38:%.*]] = phi i129 [ [[TMP16]], [[UDIV_LOOP_EXIT]] ], [ [[TMP11]], [[_UDIV_SPECIAL_CASES:%.*]] ]
+; CHECK-NEXT: [[TMP39:%.*]] = mul i129 3, [[TMP38]]
+; CHECK-NEXT: [[TMP40:%.*]] = sub i129 [[TMP2]], [[TMP39]]
+; CHECK-NEXT: [[TMP41:%.*]] = xor i129 [[TMP40]], [[TMP0]]
+; CHECK-NEXT: [[TMP42:%.*]] = sub i129 [[TMP41]], [[TMP0]]
+; CHECK-NEXT: store i129 [[TMP42]], i129* [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+ %a = load i129, i129* %ptr
+ %res = srem i129 %a, 3
+ store i129 %res, i129* %out
+ ret void
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -expand-large-div-rem < %s | FileCheck %s
+
+define void @test(i129* %ptr, i129* %out) nounwind {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: _udiv-special-cases:
+; CHECK-NEXT: [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = or i1 false, [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i129 @llvm.ctlz.i129(i129 3, i1 true)
+; CHECK-NEXT: [[TMP3:%.*]] = call i129 @llvm.ctlz.i129(i129 [[A]], i1 true)
+; CHECK-NEXT: [[TMP4:%.*]] = sub i129 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i129 [[TMP4]], 128
+; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP1]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i129 [[TMP4]], 128
+; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], i129 0, i129 [[A]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]]
+; CHECK: udiv-loop-exit:
+; CHECK-NEXT: [[TMP10:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP25:%.*]], [[UDIV_DO_WHILE:%.*]] ]
+; CHECK-NEXT: [[TMP11:%.*]] = phi i129 [ [[TMP33:%.*]], [[UDIV_BB1]] ], [ [[TMP22:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP12:%.*]] = shl i129 [[TMP11]], 1
+; CHECK-NEXT: [[TMP13:%.*]] = or i129 [[TMP10]], [[TMP12]]
+; CHECK-NEXT: br label [[UDIV_END]]
+; CHECK: udiv-do-while:
+; CHECK-NEXT: [[TMP14:%.*]] = phi i129 [ 0, [[UDIV_PREHEADER:%.*]] ], [ [[TMP25]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP15:%.*]] = phi i129 [ [[TMP31:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP28:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP16:%.*]] = phi i129 [ [[TMP30:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP27:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = phi i129 [ [[TMP33]], [[UDIV_PREHEADER]] ], [ [[TMP22]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP18:%.*]] = shl i129 [[TMP16]], 1
+; CHECK-NEXT: [[TMP19:%.*]] = lshr i129 [[TMP17]], 128
+; CHECK-NEXT: [[TMP20:%.*]] = or i129 [[TMP18]], [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = shl i129 [[TMP17]], 1
+; CHECK-NEXT: [[TMP22]] = or i129 [[TMP14]], [[TMP21]]
+; CHECK-NEXT: [[TMP23:%.*]] = sub i129 2, [[TMP20]]
+; CHECK-NEXT: [[TMP24:%.*]] = ashr i129 [[TMP23]], 128
+; CHECK-NEXT: [[TMP25]] = and i129 [[TMP24]], 1
+; CHECK-NEXT: [[TMP26:%.*]] = and i129 [[TMP24]], 3
+; CHECK-NEXT: [[TMP27]] = sub i129 [[TMP20]], [[TMP26]]
+; CHECK-NEXT: [[TMP28]] = add i129 [[TMP15]], -1
+; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i129 [[TMP28]], 0
+; CHECK-NEXT: br i1 [[TMP29]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]]
+; CHECK: udiv-preheader:
+; CHECK-NEXT: [[TMP30]] = lshr i129 [[A]], [[TMP31]]
+; CHECK-NEXT: br label [[UDIV_DO_WHILE]]
+; CHECK: udiv-bb1:
+; CHECK-NEXT: [[TMP31]] = add i129 [[TMP4]], 1
+; CHECK-NEXT: [[TMP32:%.*]] = sub i129 128, [[TMP4]]
+; CHECK-NEXT: [[TMP33]] = shl i129 [[A]], [[TMP32]]
+; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i129 [[TMP31]], 0
+; CHECK-NEXT: br i1 [[TMP34]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]]
+; CHECK: udiv-end:
+; CHECK-NEXT: [[TMP35:%.*]] = phi i129 [ [[TMP13]], [[UDIV_LOOP_EXIT]] ], [ [[TMP8]], [[_UDIV_SPECIAL_CASES:%.*]] ]
+; CHECK-NEXT: store i129 [[TMP35]], i129* [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+ %a = load i129, i129* %ptr
+ %res = udiv i129 %a, 3
+ store i129 %res, i129* %out
+ ret void
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -expand-large-div-rem < %s | FileCheck %s
+
+define void @test(i129* %ptr, i129* %out) nounwind {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: _udiv-special-cases:
+; CHECK-NEXT: [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = or i1 false, [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i129 @llvm.ctlz.i129(i129 3, i1 true)
+; CHECK-NEXT: [[TMP3:%.*]] = call i129 @llvm.ctlz.i129(i129 [[A]], i1 true)
+; CHECK-NEXT: [[TMP4:%.*]] = sub i129 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i129 [[TMP4]], 128
+; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP1]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i129 [[TMP4]], 128
+; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], i129 0, i129 [[A]]
+; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]]
+; CHECK: udiv-loop-exit:
+; CHECK-NEXT: [[TMP10:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP25:%.*]], [[UDIV_DO_WHILE:%.*]] ]
+; CHECK-NEXT: [[TMP11:%.*]] = phi i129 [ [[TMP33:%.*]], [[UDIV_BB1]] ], [ [[TMP22:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP12:%.*]] = shl i129 [[TMP11]], 1
+; CHECK-NEXT: [[TMP13:%.*]] = or i129 [[TMP10]], [[TMP12]]
+; CHECK-NEXT: br label [[UDIV_END]]
+; CHECK: udiv-do-while:
+; CHECK-NEXT: [[TMP14:%.*]] = phi i129 [ 0, [[UDIV_PREHEADER:%.*]] ], [ [[TMP25]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP15:%.*]] = phi i129 [ [[TMP31:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP28:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP16:%.*]] = phi i129 [ [[TMP30:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP27:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = phi i129 [ [[TMP33]], [[UDIV_PREHEADER]] ], [ [[TMP22]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT: [[TMP18:%.*]] = shl i129 [[TMP16]], 1
+; CHECK-NEXT: [[TMP19:%.*]] = lshr i129 [[TMP17]], 128
+; CHECK-NEXT: [[TMP20:%.*]] = or i129 [[TMP18]], [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = shl i129 [[TMP17]], 1
+; CHECK-NEXT: [[TMP22]] = or i129 [[TMP14]], [[TMP21]]
+; CHECK-NEXT: [[TMP23:%.*]] = sub i129 2, [[TMP20]]
+; CHECK-NEXT: [[TMP24:%.*]] = ashr i129 [[TMP23]], 128
+; CHECK-NEXT: [[TMP25]] = and i129 [[TMP24]], 1
+; CHECK-NEXT: [[TMP26:%.*]] = and i129 [[TMP24]], 3
+; CHECK-NEXT: [[TMP27]] = sub i129 [[TMP20]], [[TMP26]]
+; CHECK-NEXT: [[TMP28]] = add i129 [[TMP15]], -1
+; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i129 [[TMP28]], 0
+; CHECK-NEXT: br i1 [[TMP29]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]]
+; CHECK: udiv-preheader:
+; CHECK-NEXT: [[TMP30]] = lshr i129 [[A]], [[TMP31]]
+; CHECK-NEXT: br label [[UDIV_DO_WHILE]]
+; CHECK: udiv-bb1:
+; CHECK-NEXT: [[TMP31]] = add i129 [[TMP4]], 1
+; CHECK-NEXT: [[TMP32:%.*]] = sub i129 128, [[TMP4]]
+; CHECK-NEXT: [[TMP33]] = shl i129 [[A]], [[TMP32]]
+; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i129 [[TMP31]], 0
+; CHECK-NEXT: br i1 [[TMP34]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]]
+; CHECK: udiv-end:
+; CHECK-NEXT: [[TMP35:%.*]] = phi i129 [ [[TMP13]], [[UDIV_LOOP_EXIT]] ], [ [[TMP8]], [[_UDIV_SPECIAL_CASES:%.*]] ]
+; CHECK-NEXT: [[TMP36:%.*]] = mul i129 3, [[TMP35]]
+; CHECK-NEXT: [[TMP37:%.*]] = sub i129 [[A]], [[TMP36]]
+; CHECK-NEXT: store i129 [[TMP37]], i129* [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+ %a = load i129, i129* %ptr
+ %res = urem i129 %a, 3
+ store i129 %res, i129* %out
+ ret void
+}
"replace-with-veclib", "jmc-instrument",
"dot-regions", "dot-regions-only",
"view-regions", "view-regions-only",
- "select-optimize"};
+ "select-optimize", "expand-large-div-rem"};
for (const auto &P : PassNamePrefix)
if (Pass.startswith(P))
return true;
initializeTarget(Registry);
// For codegen passes, only passes that do IR to IR transformation are
// supported.
+ initializeExpandLargeDivRemLegacyPassPass(Registry);
initializeExpandMemCmpPassPass(Registry);
initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
initializeSelectOptimizePass(Registry);