From 3e39b271016837425038e55e57c39e5e9b289975 Mon Sep 17 00:00:00 2001
From: Matthias Gehre <matthias.gehre@xilinx.com>
Date: Wed, 25 May 2022 12:19:28 +0100
Subject: [PATCH] [llvm/CodeGen] Add ExpandLargeDivRem pass

Adds a pass ExpandLargeDivRem to expand div/rem instructions
with more than 128 bits into a loop computing that value.

As discussed on https://reviews.llvm.org/D120327, this approach has the advantage
that it is independent of the runtime library. This also helps the clang driver,
which otherwise would need to understand enough about the runtime library
to know whether to allow _BitInts with more than 128 bits.

Targets are still free to disable this pass and instead provide a faster
implementation in a runtime library.

Fixes https://github.com/llvm/llvm-project/issues/44994

Differential Revision: https://reviews.llvm.org/D126644
---
 llvm/include/llvm/CodeGen/ExpandLargeDivRem.h     |  29 ++++++
 llvm/include/llvm/CodeGen/MachinePassRegistry.def |   1 +
 llvm/include/llvm/CodeGen/Passes.h                |   3 +
 llvm/include/llvm/InitializePasses.h              |   1 +
 llvm/include/llvm/LinkAllPasses.h                 |   1 +
 llvm/lib/CodeGen/CMakeLists.txt                   |   1 +
 llvm/lib/CodeGen/CodeGen.cpp                      |   1 +
 llvm/lib/CodeGen/ExpandLargeDivRem.cpp            | 112 ++++++++++++++++++++++
 llvm/lib/Transforms/Utils/IntegerDivision.cpp     |  61 ++----------
 llvm/test/CodeGen/X86/urem-seteq.ll               |  15 ---
 llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll |  67 +++++++++++++
 llvm/test/Transforms/ExpandLargeDivRem/srem129.ll |  68 +++++++++++++
 llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll |  61 ++++++++++++
 llvm/test/Transforms/ExpandLargeDivRem/urem129.ll |  63 ++++++++++++
 llvm/tools/opt/opt.cpp                            |   3 +-
 15 files changed, 420 insertions(+), 67 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/ExpandLargeDivRem.h
 create mode 100644 llvm/lib/CodeGen/ExpandLargeDivRem.cpp
 create mode 100644 llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll
 create mode 100644 llvm/test/Transforms/ExpandLargeDivRem/srem129.ll
 create mode 100644 llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll
 create mode 100644 llvm/test/Transforms/ExpandLargeDivRem/urem129.ll

diff --git a/llvm/include/llvm/CodeGen/ExpandLargeDivRem.h b/llvm/include/llvm/CodeGen/ExpandLargeDivRem.h
new file mode 100644
index 0000000..15a3421
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/ExpandLargeDivRem.h
@@ -0,0 +1,29 @@
+//===----- ExpandLargeDivRem.h - Expand large div/rem ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_EXPANDLARGEDIVREM_H
+#define LLVM_CODEGEN_EXPANDLARGEDIVREM_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// Expands div/rem instructions with a bitwidth above a threshold
+/// into a loop.
+/// This is useful for backends like x86 that cannot lower divisions
+/// with more than 128 bits.
+class ExpandLargeDivRemPass : public PassInfoMixin<ExpandLargeDivRemPass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+  // The backend asserts when seeing large div/rem instructions.
+  static bool isRequired() { return true; }
+};
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_EXPANDLARGEDIVREM_H
diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
index 7748055f..077b713 100644
--- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def
+++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
@@ -43,6 +43,7 @@ FUNCTION_PASS("replace-with-veclib", ReplaceWithVeclib, ())
 FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ())
 FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false))
 FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true))
+FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass, ())
 FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ())
 FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ())
 FUNCTION_PASS("lowerinvoke", LowerInvokePass, ())
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 9822f80..a97fd5c 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -484,6 +484,9 @@ namespace llvm {
   /// predicate mask.
   FunctionPass *createExpandVectorPredicationPass();
 
+  // Expands large div/rem instructions.
+  FunctionPass *createExpandLargeDivRemPass();
+
   // This pass expands memcmp() to load/stores.
   FunctionPass *createExpandMemCmpPass();
 
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 8cf31c0..77a051f 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -145,6 +145,7 @@ void initializeEarlyTailDuplicatePass(PassRegistry&);
 void initializeEdgeBundlesPass(PassRegistry&);
 void initializeEHContGuardCatchretPass(PassRegistry &);
 void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&);
+void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
 void initializeExpandMemCmpPassPass(PassRegistry&);
 void initializeExpandPostRAPass(PassRegistry&);
 void initializeExpandReductionsPass(PassRegistry&);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index e660ea0..1a96ddf 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -190,6 +190,7 @@ namespace {
       (void) llvm::createReversePostOrderFunctionAttrsPass();
       (void) llvm::createMergeFunctionsPass();
       (void) llvm::createMergeICmpsLegacyPass();
+      (void) llvm::createExpandLargeDivRemPass();
       (void) llvm::createExpandMemCmpPass();
       (void) llvm::createExpandVectorPredicationPass();
       std::string buf;
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index e106354..5ed42c7 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -52,6 +52,7 @@ add_llvm_component_library(LLVMCodeGen
   EdgeBundles.cpp
   EHContGuardCatchret.cpp
   ExecutionDomainFix.cpp
+  ExpandLargeDivRem.cpp
   ExpandMemCmp.cpp
   ExpandPostRAPseudos.cpp
   ExpandReductions.cpp
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 5050395..219032e 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -36,6 +36,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeEarlyIfPredicatorPass(Registry);
   initializeEarlyMachineLICMPass(Registry);
   initializeEarlyTailDuplicatePass(Registry);
+  initializeExpandLargeDivRemLegacyPassPass(Registry);
   initializeExpandMemCmpPassPass(Registry);
   initializeExpandPostRAPass(Registry);
   initializeFEntryInserterPass(Registry);
diff --git a/llvm/lib/CodeGen/ExpandLargeDivRem.cpp b/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
new file mode 100644
index 0000000..fa1288a
--- /dev/null
+++ b/llvm/lib/CodeGen/ExpandLargeDivRem.cpp
@@ -0,0 +1,112 @@
+//===--- ExpandLargeDivRem.cpp - Expand large div/rem ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands div/rem instructions with a bitwidth above a threshold
+// into a call to auto-generated functions.
+// This is useful for targets like x86_64 that cannot lower divisions
+// with more than 128 bits or targets like x86_32 that cannot lower divisions
+// with more than 64 bits.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ExpandLargeDivRem.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/IntegerDivision.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+    ExpandDivRemBits("expand-div-rem-bits", cl::Hidden, cl::init(128),
+                     cl::desc("div and rem instructions on integers with "
+                              "more than <N> bits are expanded."));
+
+static bool runImpl(Function &F) {
+  SmallVector<BinaryOperator *, 4> Replace;
+  bool Modified = false;
+
+  for (auto &I : instructions(F)) {
+    switch (I.getOpcode()) {
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::URem:
+    case Instruction::SRem: {
+      // TODO: This doesn't handle vectors.
+      auto *IntTy = dyn_cast<IntegerType>(I.getType());
+      if (!IntTy || IntTy->getIntegerBitWidth() <= ExpandDivRemBits)
+        continue;
+
+      Replace.push_back(&cast<BinaryOperator>(I));
+      Modified = true;
+      break;
+    }
+    default:
+      break;
+    }
+  }
+
+  if (Replace.empty())
+    return false;
+
+  while (!Replace.empty()) {
+    BinaryOperator *I = Replace.pop_back_val();
+
+    if (I->getOpcode() == Instruction::UDiv ||
+        I->getOpcode() == Instruction::SDiv) {
+      expandDivision(I);
+    } else {
+      expandRemainder(I);
+    }
+  }
+
+  return Modified;
+}
+
+PreservedAnalyses ExpandLargeDivRemPass::run(Function &F,
+                                             FunctionAnalysisManager &AM) {
+  bool Changed = runImpl(F);
+
+  if (Changed)
+    return PreservedAnalyses::none();
+
+  return PreservedAnalyses::all();
+}
+
+class ExpandLargeDivRemLegacyPass : public FunctionPass {
+public:
+  static char ID;
+
+  ExpandLargeDivRemLegacyPass() : FunctionPass(ID) {
+    initializeExpandLargeDivRemLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override { return runImpl(F); }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addPreserved<AAResultsWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+  }
+};
+
+char ExpandLargeDivRemLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ExpandLargeDivRemLegacyPass, "expand-large-div-rem",
+                      "Expand large div/rem", false, false)
+INITIALIZE_PASS_END(ExpandLargeDivRemLegacyPass, "expand-large-div-rem",
+                    "Expand large div/rem", false, false)
+
+FunctionPass *llvm::createExpandLargeDivRemPass() {
+  return new ExpandLargeDivRemLegacyPass();
+}
diff --git a/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/llvm/lib/Transforms/Utils/IntegerDivision.cpp
index 47ab30f..1267360 100644
--- a/llvm/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/llvm/lib/Transforms/Utils/IntegerDivision.cpp
@@ -32,14 +32,7 @@ using namespace llvm;
 static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor,
                                           IRBuilder<> &Builder) {
   unsigned BitWidth = Dividend->getType()->getIntegerBitWidth();
-  ConstantInt *Shift;
-
-  if (BitWidth == 64) {
-    Shift = Builder.getInt64(63);
-  } else {
-    assert(BitWidth == 32 && "Unexpected bit width");
-    Shift = Builder.getInt32(31);
-  }
+  ConstantInt *Shift = Builder.getIntN(BitWidth, BitWidth - 1);
 
   // Following instructions are generated for both i32 (shift 31) and
   // i64 (shift 63).
@@ -104,14 +97,7 @@ static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
   // Implementation taken from compiler-rt's __divsi3 and __divdi3
 
   unsigned BitWidth = Dividend->getType()->getIntegerBitWidth();
-  ConstantInt *Shift;
-
-  if (BitWidth == 64) {
-    Shift = Builder.getInt64(63);
-  } else {
-    assert(BitWidth == 32 && "Unexpected bit width");
-    Shift = Builder.getInt32(31);
-  }
+  ConstantInt *Shift = Builder.getIntN(BitWidth, BitWidth - 1);
 
   // Following instructions are generated for both i32 (shift 31) and
   // i64 (shift 63).
@@ -156,23 +142,10 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
   IntegerType *DivTy = cast<IntegerType>(Dividend->getType());
   unsigned BitWidth = DivTy->getBitWidth();
 
-  ConstantInt *Zero;
-  ConstantInt *One;
-  ConstantInt *NegOne;
-  ConstantInt *MSB;
-
-  if (BitWidth == 64) {
-    Zero      = Builder.getInt64(0);
-    One       = Builder.getInt64(1);
-    NegOne    = ConstantInt::getSigned(DivTy, -1);
-    MSB       = Builder.getInt64(63);
-  } else {
-    assert(BitWidth == 32 && "Unexpected bit width");
-    Zero      = Builder.getInt32(0);
-    One       = Builder.getInt32(1);
-    NegOne    = ConstantInt::getSigned(DivTy, -1);
-    MSB       = Builder.getInt32(31);
-  }
+  ConstantInt *Zero = ConstantInt::get(DivTy, 0);
+  ConstantInt *One = ConstantInt::get(DivTy, 1);
+  ConstantInt *NegOne = ConstantInt::getSigned(DivTy, -1);
+  ConstantInt *MSB = ConstantInt::get(DivTy, BitWidth - 1);
 
   ConstantInt *True = Builder.getTrue();
 
@@ -367,8 +340,7 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
 /// Generate code to calculate the remainder of two integers, replacing Rem with
 /// the generated code. This currently generates code using the udiv expansion,
 /// but future work includes generating more specialized code, e.g. when more
-/// information about the operands are known. Implements both 32bit and 64bit
-/// scalar division.
+/// information about the operands are known.
 ///
 /// Replace Rem with generated code.
 bool llvm::expandRemainder(BinaryOperator *Rem) {
@@ -379,9 +351,6 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {
   IRBuilder<> Builder(Rem);
 
   assert(!Rem->getType()->isVectorTy() && "Div over vectors not supported");
-  assert((Rem->getType()->getIntegerBitWidth() == 32 ||
-          Rem->getType()->getIntegerBitWidth() == 64) &&
-         "Div of bitwidth other than 32 or 64 not supported");
 
   // First prepare the sign if it's a signed remainder
   if (Rem->getOpcode() == Instruction::SRem) {
@@ -421,12 +390,10 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {
   return true;
 }
 
-
 /// Generate code to divide two integers, replacing Div with the generated
 /// code. This currently generates code similarly to compiler-rt's
 /// implementations, but future work includes generating more specialized code
-/// when more information about the operands are known. Implements both
-/// 32bit and 64bit scalar division.
+/// when more information about the operands are known.
 ///
 /// Replace Div with generated code.
 bool llvm::expandDivision(BinaryOperator *Div) {
@@ -437,9 +404,6 @@ bool llvm::expandDivision(BinaryOperator *Div) {
   IRBuilder<> Builder(Div);
 
   assert(!Div->getType()->isVectorTy() && "Div over vectors not supported");
-  assert((Div->getType()->getIntegerBitWidth() == 32 ||
-          Div->getType()->getIntegerBitWidth() == 64) &&
-         "Div of bitwidth other than 32 or 64 not supported");
 
   // First prepare the sign if it's a signed division
   if (Div->getOpcode() == Instruction::SDiv) {
@@ -540,9 +504,7 @@ bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) {
 
   unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
 
-  assert(RemTyBitWidth <= 64 && "Div of bitwidth greater than 64 not supported");
-
-  if (RemTyBitWidth == 64)
+  if (RemTyBitWidth >= 64)
     return expandRemainder(Rem);
 
   // If bitwidth smaller than 64 extend inputs, extend output and proceed
@@ -637,10 +599,7 @@ bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) {
 
   unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
 
-  assert(DivTyBitWidth <= 64 &&
-         "Div of bitwidth greater than 64 not supported");
-
-  if (DivTyBitWidth == 64)
+  if (DivTyBitWidth >= 64)
     return expandDivision(Div);
 
   // If bitwidth smaller than 64 extend inputs, extend output and proceed
diff --git a/llvm/test/CodeGen/X86/urem-seteq.ll b/llvm/test/CodeGen/X86/urem-seteq.ll
index 34fea7e..b606e11 100644
--- a/llvm/test/CodeGen/X86/urem-seteq.ll
+++ b/llvm/test/CodeGen/X86/urem-seteq.ll
@@ -362,22 +362,7 @@ define i32 @test_urem_allones(i32 %X) nounwind {
 ; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=34366
 define void @ossfuzz34366() {
 ; X86-LABEL: ossfuzz34366:
-; X86:       # %bb.0:
-; X86-NEXT:    movl (%eax), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $2147483647, %ecx # imm = 0x7FFFFFFF
-; X86-NEXT:    orl %eax, %ecx
-; X86-NEXT:    sete (%eax)
-; X86-NEXT:    retl
-;
 ; X64-LABEL: ossfuzz34366:
-; X64:       # %bb.0:
-; X64-NEXT:    movq (%rax), %rax
-; X64-NEXT:    movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
-; X64-NEXT:    andq %rax, %rcx
-; X64-NEXT:    orq %rax, %rcx
-; X64-NEXT:    sete (%rax)
-; X64-NEXT:    retq
   %L10 = load i448, ptr undef, align 4
   %B18 = urem i448 %L10, -363419362147803445274661903944002267176820680343659030140745099590319644056698961663095525356881782780381260803133088966767300814307328
   %C13 = icmp ule i448 %B18, 0
diff --git a/llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll b/llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll
new file mode 100644
index 0000000..27ab1e8
--- /dev/null
+++ b/llvm/test/Transforms/ExpandLargeDivRem/sdiv129.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -expand-large-div-rem < %s | FileCheck %s
+
+define void @sdiv129(i129* %ptr, i129* %out) nounwind {
+; CHECK-LABEL: @sdiv129(
+; CHECK-NEXT:  _udiv-special-cases:
+; CHECK-NEXT:    [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr i129 [[A]], 128
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i129 [[TMP0]], [[A]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i129 [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i129 0, [[TMP0]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i129 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = or i1 false, [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call i129 @llvm.ctlz.i129(i129 3, i1 true)
+; CHECK-NEXT:    [[TMP7:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP2]], i1 true)
+; CHECK-NEXT:    [[TMP8:%.*]] = sub i129 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp ugt i129 [[TMP8]], 128
+; CHECK-NEXT:    [[TMP10:%.*]] = or i1 [[TMP5]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i129 [[TMP8]], 128
+; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP10]], i129 0, i129 [[TMP2]]
+; CHECK-NEXT:    [[TMP13:%.*]] = or i1 [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    br i1 [[TMP13]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]]
+; CHECK:       udiv-loop-exit:
+; CHECK-NEXT:    [[TMP14:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP29:%.*]], [[UDIV_DO_WHILE:%.*]] ]
+; CHECK-NEXT:    [[TMP15:%.*]] = phi i129 [ [[TMP37:%.*]], [[UDIV_BB1]] ], [ [[TMP26:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP16:%.*]] = shl i129 [[TMP15]], 1
+; CHECK-NEXT:    [[TMP17:%.*]] = or i129 [[TMP14]], [[TMP16]]
+; CHECK-NEXT:    br label [[UDIV_END]]
+; CHECK:       udiv-do-while:
+; CHECK-NEXT:    [[TMP18:%.*]] = phi i129 [ 0, [[UDIV_PREHEADER:%.*]] ], [ [[TMP29]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP19:%.*]] = phi i129 [ [[TMP35:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP32:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP20:%.*]] = phi i129 [ [[TMP34:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP31:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP21:%.*]] = phi i129 [ [[TMP37]], [[UDIV_PREHEADER]] ], [ [[TMP26]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP22:%.*]] = shl i129 [[TMP20]], 1
+; CHECK-NEXT:    [[TMP23:%.*]] = lshr i129 [[TMP21]], 128
+; CHECK-NEXT:    [[TMP24:%.*]] = or i129 [[TMP22]], [[TMP23]]
+; CHECK-NEXT:    [[TMP25:%.*]] = shl i129 [[TMP21]], 1
+; CHECK-NEXT:    [[TMP26]] = or i129 [[TMP18]], [[TMP25]]
+; CHECK-NEXT:    [[TMP27:%.*]] = sub i129 2, [[TMP24]]
+; CHECK-NEXT:    [[TMP28:%.*]] = ashr i129 [[TMP27]], 128
+; CHECK-NEXT:    [[TMP29]] = and i129 [[TMP28]], 1
+; CHECK-NEXT:    [[TMP30:%.*]] = and i129 [[TMP28]], 3
+; CHECK-NEXT:    [[TMP31]] = sub i129 [[TMP24]], [[TMP30]]
+; CHECK-NEXT:    [[TMP32]] = add i129 [[TMP19]], -1
+; CHECK-NEXT:    [[TMP33:%.*]] = icmp eq i129 [[TMP32]], 0
+; CHECK-NEXT:    br i1 [[TMP33]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]]
+; CHECK:       udiv-preheader:
+; CHECK-NEXT:    [[TMP34]] = lshr i129 [[TMP2]], [[TMP35]]
+; CHECK-NEXT:    br label [[UDIV_DO_WHILE]]
+; CHECK:       udiv-bb1:
+; CHECK-NEXT:    [[TMP35]] = add i129 [[TMP8]], 1
+; CHECK-NEXT:    [[TMP36:%.*]] = sub i129 128, [[TMP8]]
+; CHECK-NEXT:    [[TMP37]] = shl i129 [[TMP2]], [[TMP36]]
+; CHECK-NEXT:    [[TMP38:%.*]] = icmp eq i129 [[TMP35]], 0
+; CHECK-NEXT:    br i1 [[TMP38]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]]
+; CHECK:       udiv-end:
+; CHECK-NEXT:    [[TMP39:%.*]] = phi i129 [ [[TMP17]], [[UDIV_LOOP_EXIT]] ], [ [[TMP12]], [[_UDIV_SPECIAL_CASES:%.*]] ]
+; CHECK-NEXT:    [[TMP40:%.*]] = xor i129 [[TMP39]], [[TMP3]]
+; CHECK-NEXT:    [[TMP41:%.*]] = sub i129 [[TMP40]], [[TMP3]]
+; CHECK-NEXT:    store i129 [[TMP41]], i129* [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  %a = load i129, i129* %ptr
+  %res = sdiv i129 %a, 3
+  store i129 %res, i129* %out
+  ret void
+}
diff --git a/llvm/test/Transforms/ExpandLargeDivRem/srem129.ll b/llvm/test/Transforms/ExpandLargeDivRem/srem129.ll
new file mode 100644
index 0000000..08df750
--- /dev/null
+++ b/llvm/test/Transforms/ExpandLargeDivRem/srem129.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -expand-large-div-rem < %s | FileCheck %s
+
+define void @test(i129* %ptr, i129* %out) nounwind {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  _udiv-special-cases:
+; CHECK-NEXT:    [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr i129 [[A]], 128
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i129 [[A]], [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i129 [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i129 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = or i1 false, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call i129 @llvm.ctlz.i129(i129 3, i1 true)
+; CHECK-NEXT:    [[TMP6:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP2]], i1 true)
+; CHECK-NEXT:    [[TMP7:%.*]] = sub i129 [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ugt i129 [[TMP7]], 128
+; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP4]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i129 [[TMP7]], 128
+; CHECK-NEXT:    [[TMP11:%.*]] = select i1 [[TMP9]], i129 0, i129 [[TMP2]]
+; CHECK-NEXT:    [[TMP12:%.*]] = or i1 [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    br i1 [[TMP12]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]]
+; CHECK:       udiv-loop-exit:
+; CHECK-NEXT:    [[TMP13:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP28:%.*]], [[UDIV_DO_WHILE:%.*]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = phi i129 [ [[TMP36:%.*]], [[UDIV_BB1]] ], [ [[TMP25:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP15:%.*]] = shl i129 [[TMP14]], 1
+; CHECK-NEXT:    [[TMP16:%.*]] = or i129 [[TMP13]], [[TMP15]]
+; CHECK-NEXT:    br label [[UDIV_END]]
+; CHECK:       udiv-do-while:
+; CHECK-NEXT:    [[TMP17:%.*]] = phi i129 [ 0, [[UDIV_PREHEADER:%.*]] ], [ [[TMP28]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP18:%.*]] = phi i129 [ [[TMP34:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP31:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP19:%.*]] = phi i129 [ [[TMP33:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP30:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP20:%.*]] = phi i129 [ [[TMP36]], [[UDIV_PREHEADER]] ], [ [[TMP25]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP21:%.*]] = shl i129 [[TMP19]], 1
+; CHECK-NEXT:    [[TMP22:%.*]] = lshr i129 [[TMP20]], 128
+; CHECK-NEXT:    [[TMP23:%.*]] = or i129 [[TMP21]], [[TMP22]]
+; CHECK-NEXT:    [[TMP24:%.*]] = shl i129 [[TMP20]], 1
+; CHECK-NEXT:    [[TMP25]] = or i129 [[TMP17]], [[TMP24]]
+; CHECK-NEXT:    [[TMP26:%.*]] = sub i129 2, [[TMP23]]
+; CHECK-NEXT:    [[TMP27:%.*]] = ashr i129 [[TMP26]], 128
+; CHECK-NEXT:    [[TMP28]] = and i129 [[TMP27]], 1
+; CHECK-NEXT:    [[TMP29:%.*]] = and i129 [[TMP27]], 3
+; CHECK-NEXT:    [[TMP30]] = sub i129 [[TMP23]], [[TMP29]]
+; CHECK-NEXT:    [[TMP31]] = add i129 [[TMP18]], -1
+; CHECK-NEXT:    [[TMP32:%.*]] = icmp eq i129 [[TMP31]], 0
+; CHECK-NEXT:    br i1 [[TMP32]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]]
+; CHECK:       udiv-preheader:
+; CHECK-NEXT:    [[TMP33]] = lshr i129 [[TMP2]], [[TMP34]]
+; CHECK-NEXT:    br label [[UDIV_DO_WHILE]]
+; CHECK:       udiv-bb1:
+; CHECK-NEXT:    [[TMP34]] = add i129 [[TMP7]], 1
+; CHECK-NEXT:    [[TMP35:%.*]] = sub i129 128, [[TMP7]]
+; CHECK-NEXT:    [[TMP36]] = shl i129 [[TMP2]], [[TMP35]]
+; CHECK-NEXT:    [[TMP37:%.*]] = icmp eq i129 [[TMP34]], 0
+; CHECK-NEXT:    br i1 [[TMP37]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]]
+; CHECK:       udiv-end:
+; CHECK-NEXT:    [[TMP38:%.*]] = phi i129 [ [[TMP16]], [[UDIV_LOOP_EXIT]] ], [ [[TMP11]], [[_UDIV_SPECIAL_CASES:%.*]] ]
+; CHECK-NEXT:    [[TMP39:%.*]] = mul i129 3, [[TMP38]]
+; CHECK-NEXT:    [[TMP40:%.*]] = sub i129 [[TMP2]], [[TMP39]]
+; CHECK-NEXT:    [[TMP41:%.*]] = xor i129 [[TMP40]], [[TMP0]]
+; CHECK-NEXT:    [[TMP42:%.*]] = sub i129 [[TMP41]], [[TMP0]]
+; CHECK-NEXT:    store i129 [[TMP42]], i129* [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  %a = load i129, i129* %ptr
+  %res = srem i129 %a, 3
+  store i129 %res, i129* %out
+  ret void
+}
diff --git a/llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll b/llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll
new file mode 100644
index 0000000..31705a9
--- /dev/null
+++ b/llvm/test/Transforms/ExpandLargeDivRem/udiv129.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -expand-large-div-rem < %s | FileCheck %s
+
+define void @test(i129* %ptr, i129* %out) nounwind {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  _udiv-special-cases:
+; CHECK-NEXT:    [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i129 [[A]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = or i1 false, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i129 @llvm.ctlz.i129(i129 3, i1 true)
+; CHECK-NEXT:    [[TMP3:%.*]] = call i129 @llvm.ctlz.i129(i129 [[A]], i1 true)
+; CHECK-NEXT:    [[TMP4:%.*]] = sub i129 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ugt i129 [[TMP4]], 128
+; CHECK-NEXT:    [[TMP6:%.*]] = or i1 [[TMP1]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i129 [[TMP4]], 128
+; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP6]], i129 0, i129 [[A]]
+; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]]
+; CHECK:       udiv-loop-exit:
+; CHECK-NEXT:    [[TMP10:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP25:%.*]], [[UDIV_DO_WHILE:%.*]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i129 [ [[TMP33:%.*]], [[UDIV_BB1]] ], [ [[TMP22:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = shl i129 [[TMP11]], 1
+; CHECK-NEXT:    [[TMP13:%.*]] = or i129 [[TMP10]], [[TMP12]]
+; CHECK-NEXT:    br label [[UDIV_END]]
+; CHECK:       udiv-do-while:
+; CHECK-NEXT:    [[TMP14:%.*]] = phi i129 [ 0, [[UDIV_PREHEADER:%.*]] ], [ [[TMP25]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP15:%.*]] = phi i129 [ [[TMP31:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP28:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP16:%.*]] = phi i129 [ [[TMP30:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP27:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP17:%.*]] = phi i129 [ [[TMP33]], [[UDIV_PREHEADER]] ], [ [[TMP22]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP18:%.*]] = shl i129 [[TMP16]], 1
+; CHECK-NEXT:    [[TMP19:%.*]] = lshr i129 [[TMP17]], 128
+; CHECK-NEXT:    [[TMP20:%.*]] = or i129 [[TMP18]], [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = shl i129 [[TMP17]], 1
+; CHECK-NEXT:    [[TMP22]] = or i129 [[TMP14]], [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = sub i129 2, [[TMP20]]
+; CHECK-NEXT:    [[TMP24:%.*]] = ashr i129 [[TMP23]], 128
+; CHECK-NEXT:    [[TMP25]] = and i129 [[TMP24]], 1
+; CHECK-NEXT:    [[TMP26:%.*]] = and i129 [[TMP24]], 3
+; CHECK-NEXT:    [[TMP27]] = sub i129 [[TMP20]], [[TMP26]]
+; CHECK-NEXT:    [[TMP28]] = add i129 [[TMP15]], -1
+; CHECK-NEXT:    [[TMP29:%.*]] = icmp eq i129 [[TMP28]], 0
+; CHECK-NEXT:    br i1 [[TMP29]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]]
+; CHECK:       udiv-preheader:
+; CHECK-NEXT:    [[TMP30]] = lshr i129 [[A]], [[TMP31]]
+; CHECK-NEXT:    br label [[UDIV_DO_WHILE]]
+; CHECK:       udiv-bb1:
+; CHECK-NEXT:    [[TMP31]] = add i129 [[TMP4]], 1
+; CHECK-NEXT:    [[TMP32:%.*]] = sub i129 128, [[TMP4]]
+; CHECK-NEXT:    [[TMP33]] = shl i129 [[A]], [[TMP32]]
+; CHECK-NEXT:    [[TMP34:%.*]] = icmp eq i129 [[TMP31]], 0
+; CHECK-NEXT:    br i1 [[TMP34]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]]
+; CHECK:       udiv-end:
+; CHECK-NEXT:    [[TMP35:%.*]] = phi i129 [ [[TMP13]], [[UDIV_LOOP_EXIT]] ], [ [[TMP8]], [[_UDIV_SPECIAL_CASES:%.*]] ]
+; CHECK-NEXT:    store i129 [[TMP35]], i129* [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  %a = load i129, i129* %ptr
+  %res = udiv i129 %a, 3
+  store i129 %res, i129* %out
+  ret void
+}
diff --git a/llvm/test/Transforms/ExpandLargeDivRem/urem129.ll b/llvm/test/Transforms/ExpandLargeDivRem/urem129.ll
new file mode 100644
index 0000000..4e91eef
--- /dev/null
+++ b/llvm/test/Transforms/ExpandLargeDivRem/urem129.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -expand-large-div-rem < %s | FileCheck %s
+
+define void @test(i129* %ptr, i129* %out) nounwind {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  _udiv-special-cases:
+; CHECK-NEXT:    [[A:%.*]] = load i129, i129* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i129 [[A]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = or i1 false, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i129 @llvm.ctlz.i129(i129 3, i1 true)
+; CHECK-NEXT:    [[TMP3:%.*]] = call i129 @llvm.ctlz.i129(i129 [[A]], i1 true)
+; CHECK-NEXT:    [[TMP4:%.*]] = sub i129 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ugt i129 [[TMP4]], 128
+; CHECK-NEXT:    [[TMP6:%.*]] = or i1 [[TMP1]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i129 [[TMP4]], 128
+; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[TMP6]], i129 0, i129 [[A]]
+; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]]
+; CHECK:       udiv-loop-exit:
+; CHECK-NEXT:    [[TMP10:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP25:%.*]], [[UDIV_DO_WHILE:%.*]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i129 [ [[TMP33:%.*]], [[UDIV_BB1]] ], [ [[TMP22:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = shl i129 [[TMP11]], 1
+; CHECK-NEXT:    [[TMP13:%.*]] = or i129 [[TMP10]], [[TMP12]]
+; CHECK-NEXT:    br label [[UDIV_END]]
+; CHECK:       udiv-do-while:
+; CHECK-NEXT:    [[TMP14:%.*]] = phi i129 [ 0, [[UDIV_PREHEADER:%.*]] ], [ [[TMP25]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP15:%.*]] = phi i129 [ [[TMP31:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP28:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP16:%.*]] = phi i129 [ [[TMP30:%.*]], [[UDIV_PREHEADER]] ], [ [[TMP27:%.*]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP17:%.*]] = phi i129 [ [[TMP33]], [[UDIV_PREHEADER]] ], [ [[TMP22]], [[UDIV_DO_WHILE]] ]
+; CHECK-NEXT:    [[TMP18:%.*]] = shl i129 [[TMP16]], 1
+; CHECK-NEXT:    [[TMP19:%.*]] = lshr i129 [[TMP17]], 128
+; CHECK-NEXT:    [[TMP20:%.*]] = or i129 [[TMP18]], [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = shl i129 [[TMP17]], 1
+; CHECK-NEXT:    [[TMP22]] = or i129 [[TMP14]], [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = sub i129 2, [[TMP20]]
+; CHECK-NEXT:    [[TMP24:%.*]] = ashr i129 [[TMP23]], 128
+; CHECK-NEXT:    [[TMP25]] = and i129 [[TMP24]], 1
+; CHECK-NEXT:    [[TMP26:%.*]] = and i129 [[TMP24]], 3
+; CHECK-NEXT:    [[TMP27]] = sub i129 [[TMP20]], [[TMP26]]
+; CHECK-NEXT:    [[TMP28]] = add i129 [[TMP15]], -1
+; CHECK-NEXT:    [[TMP29:%.*]] = icmp eq i129 [[TMP28]], 0
+; CHECK-NEXT:    br i1 [[TMP29]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]]
+; CHECK:       udiv-preheader:
+; CHECK-NEXT:    [[TMP30]] = lshr i129 [[A]], [[TMP31]]
+; CHECK-NEXT:    br label [[UDIV_DO_WHILE]]
+; CHECK:       udiv-bb1:
+; CHECK-NEXT:    [[TMP31]] = add i129 [[TMP4]], 1
+; CHECK-NEXT:    [[TMP32:%.*]] = sub i129 128, [[TMP4]]
+; CHECK-NEXT:    [[TMP33]] = shl i129 [[A]], [[TMP32]]
+; CHECK-NEXT:    [[TMP34:%.*]] = icmp eq i129 [[TMP31]], 0
+; CHECK-NEXT:    br i1 [[TMP34]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]]
+; CHECK:       udiv-end:
+; CHECK-NEXT:    [[TMP35:%.*]] = phi i129 [ [[TMP13]], [[UDIV_LOOP_EXIT]] ], [ [[TMP8]], [[_UDIV_SPECIAL_CASES:%.*]] ]
+; CHECK-NEXT:    [[TMP36:%.*]] = mul i129 3, [[TMP35]]
+; CHECK-NEXT:    [[TMP37:%.*]] = sub i129 [[A]], [[TMP36]]
+; CHECK-NEXT:    store i129 [[TMP37]], i129* [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  %a = load i129, i129* %ptr
+  %res = urem i129 %a, 3
+  store i129 %res, i129* %out
+  ret void
+}
diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
index 0837dd6..70b3b54 100644
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -455,7 +455,7 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
       "replace-with-veclib",  "jmc-instrument",
       "dot-regions",          "dot-regions-only",
       "view-regions",         "view-regions-only",
-      "select-optimize"};
+      "select-optimize",      "expand-large-div-rem"};
   for (const auto &P : PassNamePrefix)
     if (Pass.startswith(P))
       return true;
@@ -504,6 +504,7 @@ int main(int argc, char **argv) {
   initializeTarget(Registry);
   // For codegen passes, only passes that do IR to IR transformation are
   // supported.
+  initializeExpandLargeDivRemLegacyPassPass(Registry);
   initializeExpandMemCmpPassPass(Registry);
   initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
   initializeSelectOptimizePass(Registry);
-- 
2.7.4