From 400ad6f95d7a8d425463571f5c24efc562677ce7 Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Wed, 8 Apr 2020 12:57:02 -0700
Subject: [PATCH] [mlir] Eliminate the remaining usages of cl::opt instead of
 PassOption.

Summary: Pass options are a better choice for various reasons and avoid the need for static constructors.

Differential Revision: https://reviews.llvm.org/D77707
---
 llvm/include/llvm/Support/CommandLine.h            |   4 +-
 mlir/include/mlir/Dialect/Affine/Passes.h          |   2 +-
 mlir/include/mlir/Dialect/Affine/Passes.td         |  50 ++++++++++
 mlir/include/mlir/IR/OperationSupport.h            |   3 +
 mlir/include/mlir/Pass/PassOptions.h               |  34 +++++--
 mlir/include/mlir/Transforms/Passes.td             |  33 +++++--
 mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp  |   4 +-
 .../Affine/Transforms/AffineDataCopyGeneration.cpp |  83 ++++-------------
 mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp  |  70 ++++----------
 mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp  |  48 ++++------
 .../Dialect/Affine/Transforms/LoopUnrollAndJam.cpp |  35 +------
 .../Dialect/Affine/Transforms/SuperVectorize.cpp   |   2 +-
 mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp      |   6 +-
 mlir/lib/IR/AsmPrinter.cpp                         |   5 +
 mlir/lib/Transforms/Inliner.cpp                    | 101 ++++++++++-----------
 mlir/lib/Transforms/LoopFusion.cpp                 | 101 ++++++---------------
 .../Utils/GreedyPatternRewriteDriver.cpp           |   7 +-
 mlir/lib/Transforms/ViewOpGraph.cpp                |  19 ++--
 mlir/test/Dialect/Affine/affine-data-copy.mlir     |   4 +-
 mlir/test/Dialect/Affine/dma-generate.mlir         |   4 +-
 mlir/test/Dialect/Affine/inlining.mlir             |   2 +-
 mlir/test/Dialect/Affine/loop-tiling.mlir          |   6 +-
 mlir/test/Dialect/Affine/unroll-jam.mlir           |   4 +-
 mlir/test/Dialect/Affine/unroll.mlir               |   8 +-
 mlir/test/Dialect/SPIRV/Transforms/inlining.mlir   |   2 +-
 mlir/test/Transforms/inlining.mlir                 |   6 +-
 mlir/test/Transforms/loop-fusion.mlir              |   2 +-
 mlir/test/lib/Pass/TestPassManager.cpp             |   7 +-
 28 files changed, 292 insertions(+), 360 deletions(-)
diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h
index 0242e35..2c87d8f 100644
--- a/llvm/include/llvm/Support/CommandLine.h
+++ b/llvm/include/llvm/Support/CommandLine.h
@@ -1606,8 +1606,8 @@ public:
   reference front() { return Storage.front(); }
   const_reference front() const { return Storage.front(); }
 
-  operator std::vector<DataType>&() { return Storage; }
-  operator ArrayRef<DataType>() { return Storage; }
+  operator std::vector<DataType> &() { return Storage; }
+  operator ArrayRef<DataType>() const { return Storage; }
   std::vector<DataType> *operator&() { return &Storage; }
   const std::vector<DataType> *operator&() const { return &Storage; }
 
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h
index 5d3b997..0d7c3be 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.h
+++ b/mlir/include/mlir/Dialect/Affine/Passes.h
@@ -59,7 +59,7 @@ std::unique_ptr<OperationPass<FuncOp>> createLoopTilingPass();
 /// and no callback is provided, anything passed from the command-line (if at
 /// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor).
 std::unique_ptr<OperationPass<FuncOp>> createLoopUnrollPass(
-    int unrollFactor = -1, int unrollFull = -1,
+    int unrollFactor = -1, bool unrollFull = false,
     const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr);
 
 /// Creates a loop unroll jam pass to unroll jam by the specified factor. A
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
index 8fef996..06e09204 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -18,6 +18,28 @@ include "mlir/Pass/PassBase.td"
 def AffineDataCopyGeneration : FunctionPass<"affine-data-copy-generate"> {
   let summary = "Generate explicit copying for affine memory operations";
   let constructor = "mlir::createAffineDataCopyGenerationPass()";
+  let options = [
+    Option<"fastMemoryCapacity", "fast-mem-capacity", "uint64_t",
+           /*default=*/"std::numeric_limits<uint64_t>::max()",
+           "Set fast memory space capacity in KiB (default: unlimited)">,
+    Option<"fastMemorySpace", "fast-mem-space", "unsigned",
+           /*default=*/"1",
+           "Fast memory space identifier for copy generation (default: 1)">,
+    Option<"generateDma", "generate-dma", "bool",
+           /*default=*/"true", "Generate DMA instead of point-wise copy">,
+    Option<"minDmaTransferSize", "min-dma-transfer", "int",
+           /*default=*/"1024",
+           "Minimum DMA transfer size supported by the target in bytes">,
+    Option<"slowMemorySpace", "slow-mem-space", "unsigned",
+           /*default=*/"0",
+           "Slow memory space identifier for copy generation (default: 0)">,
+    Option<"skipNonUnitStrideLoops", "skip-non-unit-stride-loops", "bool",
+           /*default=*/"false", "Testing purposes: avoid non-unit stride loop "
+                                "choice depths for copy placement">,
+    Option<"tagMemorySpace", "tag-mem-space", "unsigned",
+           /*default=*/"0",
+           "Tag memory space identifier for copy generation (default: 0)">,
+  ];
 }
 
 def AffineLoopInvariantCodeMotion
@@ -29,16 +51,44 @@ def AffineLoopInvariantCodeMotion
 def AffineLoopTiling : FunctionPass<"affine-loop-tile"> {
   let summary = "Tile affine loop nests";
   let constructor = "mlir::createLoopTilingPass()";
+  let options = [
+    Option<"cacheSizeInKiB", "cache-size", "uint64_t", /*default=*/"512",
+           "Set size of cache to tile for in KiB">,
+    Option<"separate", "separate", "bool", /*default=*/"",
+           "Separate full and partial tiles">,
+    Option<"tileSize", "tile-size", "unsigned", /*default=*/"",
+           "Use this tile size for all loops">,
+    ListOption<"tileSizes", "tile-sizes", "unsigned",
+               "List of tile sizes for each perfect nest "
+               "(overridden by -tile-size)",
+               "llvm::cl::ZeroOrMore">,
+  ];
 }
 
 def AffineLoopUnroll : FunctionPass<"affine-loop-unroll"> {
   let summary = "Unroll affine loops";
   let constructor = "mlir::createLoopUnrollPass()";
+  let options = [
+    Option<"unrollFactor", "unroll-factor", "unsigned", /*default=*/"4",
+           "Use this unroll factor for all loops being unrolled">,
+    Option<"unrollFull", "unroll-full", "bool", /*default=*/"false",
+           "Fully unroll loops">,
+    Option<"numRepetitions", "unroll-num-reps", "unsigned", /*default=*/"1",
+           "Unroll innermost loops repeatedly this many times">,
+    Option<"unrollFullThreshold", "unroll-full-threshold", "unsigned",
+           /*default=*/"1",
+           "Unroll all loops with trip count less than or equal to this">,
+  ];
 }
 
 def AffineLoopUnrollAndJam : FunctionPass<"affine-loop-unroll-jam"> {
   let summary = "Unroll and jam affine loops";
   let constructor = "mlir::createLoopUnrollAndJamPass()";
+  let options = [
+    Option<"unrollJamFactor", "unroll-jam-factor", "unsigned",
+           /*default=*/"4",
+           "Use this unroll jam factor for all loops (default 4)">,
+  ];
 }
 
 def AffineVectorize : FunctionPass<"affine-super-vectorize"> {
diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h
index 8928886..8d6bda3 100644
--- a/mlir/include/mlir/IR/OperationSupport.h
+++ b/mlir/include/mlir/IR/OperationSupport.h
@@ -514,6 +514,9 @@ public:
   /// Return if the given ElementsAttr should be elided.
   bool shouldElideElementsAttr(ElementsAttr attr) const;
 
+  /// Return the size limit for printing large ElementsAttr.
+  Optional<int64_t> getLargeElementsAttrLimit() const;
+
   /// Return if debug information should be printed.
   bool shouldPrintDebugInfo() const;
 
diff --git a/mlir/include/mlir/Pass/PassOptions.h b/mlir/include/mlir/Pass/PassOptions.h
index d360bad6..bb2592d 100644
--- a/mlir/include/mlir/Pass/PassOptions.h
+++ b/mlir/include/mlir/Pass/PassOptions.h
@@ -42,6 +42,9 @@ private:
     /// Return the argument string of this option.
     StringRef getArgStr() const { return getOption()->ArgStr; }
 
+    /// Returns true if this option has any value assigned to it.
+    bool hasValue() const { return optHasValue; }
+
   protected:
     /// Return the main option instance.
     virtual const llvm::cl::Option *getOption() const = 0;
@@ -49,6 +52,9 @@ private:
     /// Copy the value from the given option into this one.
     virtual void copyValueFrom(const OptionBase &other) = 0;
 
+    /// Flag indicating if this option has a value.
+    bool optHasValue = false;
+
     /// Allow access to private methods.
     friend PassOptions;
   };
@@ -113,10 +119,17 @@ public:
       assert(!this->isPositional() && !this->isSink() &&
              "sink and positional options are not supported");
       parent.options.push_back(this);
+
+      // Set a callback to track if this option has a value.
+      this->setCallback([this](const auto &) { this->optHasValue = true; });
     }
+    ~Option() override = default;
     using llvm::cl::opt<DataType, /*ExternalStorage=*/false,
                         OptionParser>::operator=;
-    ~Option() override = default;
+    Option &operator=(const Option &other) {
+      *this = other.getValue();
+      return *this;
+    }
 
   private:
     /// Return the main option instance.
@@ -132,6 +145,7 @@ public:
     void copyValueFrom(const OptionBase &other) final {
       this->setValue(static_cast<const Option<DataType, OptionParser> &>(other)
                          .getValue());
+      optHasValue = other.optHasValue;
     }
   };
 
@@ -149,16 +163,26 @@ public:
       assert(!this->isPositional() && !this->isSink() &&
              "sink and positional options are not supported");
       parent.options.push_back(this);
+
+      // Set a callback to track if this option has a value.
+      this->setCallback([this](const auto &) { this->optHasValue = true; });
     }
     ~ListOption() override = default;
+    ListOption<DataType, OptionParser> &
+    operator=(const ListOption<DataType, OptionParser> &other) {
+      *this = ArrayRef<DataType>(other);
+      this->optHasValue = other.optHasValue;
+      return *this;
+    }
 
     /// Allow assigning from an ArrayRef.
     ListOption<DataType, OptionParser> &operator=(ArrayRef<DataType> values) {
-      (*this)->assign(values.begin(), values.end());
+      ((std::vector<DataType> &)*this).assign(values.begin(), values.end());
+      optHasValue = true;
       return *this;
     }
 
-    std::vector<DataType> *operator->() { return &*this; }
+    MutableArrayRef<DataType> operator->() const { return &*this; }
 
   private:
     /// Return the main option instance.
@@ -175,9 +199,7 @@ public:
 
     /// Copy the value from the given option into this one.
     void copyValueFrom(const OptionBase &other) final {
-      (*this) = ArrayRef<DataType>(
-          (ListOption<DataType, OptionParser> &)(const_cast<OptionBase &>(
-              other)));
+      *this = static_cast<const ListOption<DataType, OptionParser> &>(other);
     }
   };
 
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index 04cf087..70b8f81 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -15,6 +15,24 @@
 
 include "mlir/Pass/PassBase.td"
 
+def AffineLoopFusion : FunctionPass<"affine-loop-fusion"> {
+  let summary = "Fuse affine loop nests";
+  let constructor = "mlir::createLoopFusionPass()";
+  let options = [
+    Option<"computeToleranceThreshold", "fusion-compute-tolerance", "double",
+           /*default=*/"0.30f", "Fractional increase in additional computation "
+                                "tolerated while fusing">,
+    Option<"fastMemorySpace", "fusion-fast-mem-space", "unsigned",
+           /*default=*/"0",
+           "Faster memory space number to promote fusion buffers to">,
+    Option<"localBufSizeThreshold", "fusion-local-buf-threshold", "uint64_t",
+           /*default=*/"0", "Threshold size (KiB) for promoting local buffers "
+                            "to fast memory space">,
+    Option<"maximalFusion", "fusion-maximal", "bool", /*default=*/"false",
+           "Enables maximal loop fusion">,
+  ];
+}
+
 def AffinePipelineDataTransfer
     : FunctionPass<"affine-pipeline-data-transfer"> {
   let summary = "Pipeline non-blocking data transfers between explicitly "
@@ -84,11 +102,6 @@ def AffinePipelineDataTransfer
   let constructor = "mlir::createPipelineDataTransferPass()";
 }
 
-def AffineLoopFusion : FunctionPass<"affine-loop-fusion"> {
-  let summary = "Fuse affine loop nests";
-  let constructor = "mlir::createLoopFusionPass()";
-}
-
 def Canonicalizer : Pass<"canonicalize"> {
   let summary = "Canonicalize operations";
   let constructor = "mlir::createCanonicalizerPass()";
@@ -106,6 +119,14 @@ def CSE : Pass<"cse"> {
 def Inliner : Pass<"inline"> {
   let summary = "Inline function calls";
   let constructor = "mlir::createInlinerPass()";
+  let options = [
+    Option<"disableCanonicalization", "disable-simplify", "bool",
+           /*default=*/"false",
+           "Disable running simplifications during inlining">,
+    Option<"maxInliningIterations", "max-iterations", "unsigned",
+           /*default=*/"4",
+           "Maximum number of iterations when inlining within an SCC">,
+  ];
 }
 
 def LocationSnapshot : Pass<"snapshot-op-locations"> {
@@ -113,7 +134,7 @@ def LocationSnapshot : Pass<"snapshot-op-locations"> {
   let constructor = "mlir::createLocationSnapshotPass()";
   let options = [
     Option<"fileName", "filename", "std::string", /*default=*/"",
-           "The filename to print the generated IR.">,
+           "The filename to print the generated IR">,
     Option<"tag", "tag", "std::string", /*default=*/"",
            "A tag to use when fusing the new locations with the "
            "original. If unset, the locations are replaced.">,
diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
index 9ea64d9..a597dd7 100644
--- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
+++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
@@ -61,8 +61,8 @@ struct ImperfectlyNestedForLoopMapper
   ImperfectlyNestedForLoopMapper() = default;
   ImperfectlyNestedForLoopMapper(ArrayRef<int64_t> numWorkGroups,
                                  ArrayRef<int64_t> workGroupSize) {
-    this->numWorkGroups->assign(numWorkGroups.begin(), numWorkGroups.end());
-    this->workGroupSize->assign(workGroupSize.begin(), workGroupSize.end());
+    this->numWorkGroups = numWorkGroups;
+    this->workGroupSize = workGroupSize;
   }
 
   void runOnFunction() override {
diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
index a052547..bff60f4 100644
--- a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
@@ -35,32 +35,6 @@
 
 using namespace mlir;
 
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-static llvm::cl::opt<unsigned long long> clFastMemoryCapacity(
-    "affine-data-copy-generate-fast-mem-capacity",
-    llvm::cl::desc(
-        "Set fast memory space capacity in KiB (default: unlimited)"),
-    llvm::cl::cat(clOptionsCategory));
-
-static llvm::cl::opt<bool>
-    clDma("affine-data-copy-generate-dma",
-          llvm::cl::desc("Generate DMA instead of point-wise copy"),
-          llvm::cl::cat(clOptionsCategory), llvm::cl::init(true));
-
-static llvm::cl::opt<unsigned> clFastMemorySpace(
-    "affine-data-copy-generate-fast-mem-space", llvm::cl::init(1),
-    llvm::cl::desc(
-        "Fast memory space identifier for copy generation (default: 1)"),
-    llvm::cl::cat(clOptionsCategory));
-
-static llvm::cl::opt<bool> clSkipNonUnitStrideLoop(
-    "affine-data-copy-generate-skip-non-unit-stride-loops", llvm::cl::Hidden,
-    llvm::cl::init(false),
-    llvm::cl::desc("Testing purposes: avoid non-unit stride loop choice depths "
-                   "for copy placement"),
-    llvm::cl::cat(clOptionsCategory));
-
 namespace {
 
 /// Replaces all loads and stores on memref's living in 'slowMemorySpace' by
@@ -76,51 +50,22 @@ namespace {
 // are strided. Check for strided stores.
 struct AffineDataCopyGeneration
     : public AffineDataCopyGenerationBase<AffineDataCopyGeneration> {
-  explicit AffineDataCopyGeneration(
-      unsigned slowMemorySpace = 0,
-      unsigned fastMemorySpace = clFastMemorySpace, unsigned tagMemorySpace = 0,
-      int minDmaTransferSize = 1024,
-      uint64_t fastMemCapacityBytes =
-          (clFastMemoryCapacity.getNumOccurrences() > 0
-               ? clFastMemoryCapacity * 1024 // cl-provided size is in KiB
-               : std::numeric_limits<uint64_t>::max()),
-      bool generateDma = clDma,
-      bool skipNonUnitStrideLoops = clSkipNonUnitStrideLoop)
-      : slowMemorySpace(slowMemorySpace), fastMemorySpace(fastMemorySpace),
-        tagMemorySpace(tagMemorySpace), minDmaTransferSize(minDmaTransferSize),
-        fastMemCapacityBytes(fastMemCapacityBytes), generateDma(generateDma),
-        skipNonUnitStrideLoops(skipNonUnitStrideLoops) {}
-
-  explicit AffineDataCopyGeneration(const AffineDataCopyGeneration &other)
-      : AffineDataCopyGenerationBase<AffineDataCopyGeneration>(other),
-        slowMemorySpace(other.slowMemorySpace),
-        fastMemorySpace(other.fastMemorySpace),
-        tagMemorySpace(other.tagMemorySpace),
-        minDmaTransferSize(other.minDmaTransferSize),
-        fastMemCapacityBytes(other.fastMemCapacityBytes),
-        generateDma(other.generateDma),
-        skipNonUnitStrideLoops(other.skipNonUnitStrideLoops) {}
+  AffineDataCopyGeneration() = default;
+  explicit AffineDataCopyGeneration(unsigned slowMemorySpace,
+                                    unsigned fastMemorySpace,
+                                    unsigned tagMemorySpace,
+                                    int minDmaTransferSize,
+                                    uint64_t fastMemCapacityBytes) {
+    this->slowMemorySpace = slowMemorySpace;
+    this->fastMemorySpace = fastMemorySpace;
+    this->tagMemorySpace = tagMemorySpace;
+    this->minDmaTransferSize = minDmaTransferSize;
+    this->fastMemoryCapacity = fastMemCapacityBytes / 1024;
+  }
 
   void runOnFunction() override;
   LogicalResult runOnBlock(Block *block, DenseSet<Operation *> &copyNests);
 
-  // Slow memory space associated with copies.
-  const unsigned slowMemorySpace;
-  // Fast memory space associated with copies.
-  unsigned fastMemorySpace;
-  // Memory space associated with DMA tags.
-  unsigned tagMemorySpace;
-  // Minimum DMA transfer size supported by the target in bytes.
-  const int minDmaTransferSize;
-  // Capacity of the faster memory space.
-  uint64_t fastMemCapacityBytes;
-
-  // If set, generate DMA operations instead of read/write.
-  bool generateDma;
-
-  // If set, ignore loops with steps other than 1.
-  bool skipNonUnitStrideLoops;
-
   // Constant zero index to avoid too many duplicates.
   Value zeroIndex = nullptr;
 };
@@ -153,6 +98,10 @@ AffineDataCopyGeneration::runOnBlock(Block *block,
   if (block->empty())
     return success();
 
+  uint64_t fastMemCapacityBytes =
+      fastMemoryCapacity != std::numeric_limits<uint64_t>::max()
+          ? fastMemoryCapacity * 1024
+          : fastMemoryCapacity;
   AffineCopyOptions copyOptions = {generateDma, slowMemorySpace,
                                    fastMemorySpace, tagMemorySpace,
                                    fastMemCapacityBytes};
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
index 0e8e109..b15a7372 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
@@ -28,40 +28,15 @@ using namespace mlir;
 
 #define DEBUG_TYPE "affine-loop-tile"
 
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-static llvm::cl::opt<unsigned long long>
-    clCacheSizeKiB("affine-tile-cache-size",
-                   llvm::cl::desc("Set size of cache to tile for in KiB"),
-                   llvm::cl::cat(clOptionsCategory));
-
-// Separate full and partial tiles.
-static llvm::cl::opt<bool>
-    clSeparate("affine-tile-separate",
-               llvm::cl::desc("Separate full and partial tiles"),
-               llvm::cl::cat(clOptionsCategory));
-
-// Tile size to use for all loops (overrides -tile-sizes if provided).
-static llvm::cl::opt<unsigned>
-    clTileSize("affine-tile-size",
-               llvm::cl::desc("Use this tile size for all loops"),
-               llvm::cl::cat(clOptionsCategory));
-
-// List of tile sizes. If any of them aren't provided, they are filled with
-// clTileSize / kDefaultTileSize.
-static llvm::cl::list<unsigned> clTileSizes(
-    "affine-tile-sizes",
-    llvm::cl::desc(
-        "List of tile sizes for each perfect nest (overridden by -tile-size)"),
-    llvm::cl::ZeroOrMore, llvm::cl::cat(clOptionsCategory));
-
 namespace {
 
 /// A pass to perform loop tiling on all suitable loop nests of a Function.
 struct LoopTiling : public AffineLoopTilingBase<LoopTiling> {
-  explicit LoopTiling(uint64_t cacheSizeBytes = kDefaultCacheMemCapacity,
-                      bool avoidMaxMinBounds = true)
-      : cacheSizeBytes(cacheSizeBytes), avoidMaxMinBounds(avoidMaxMinBounds) {}
+  LoopTiling() = default;
+  explicit LoopTiling(uint64_t cacheSizeBytes, bool avoidMaxMinBounds = true)
+      : avoidMaxMinBounds(avoidMaxMinBounds) {
+    this->cacheSizeInKiB = cacheSizeBytes / 1024;
+  }
 
   void runOnFunction() override;
   void getTileSizes(ArrayRef<AffineForOp> band,
@@ -69,12 +44,9 @@ struct LoopTiling : public AffineLoopTilingBase<LoopTiling> {
 
   // Default tile size if nothing is provided.
   constexpr static unsigned kDefaultTileSize = 4;
-  constexpr static uint64_t kDefaultCacheMemCapacity = 512 * 1024UL;
 
-  // Capacity of the cache to tile for.
-  uint64_t cacheSizeBytes;
   // If true, tile sizes are set to avoid max/min in bounds if possible.
-  bool avoidMaxMinBounds;
+  bool avoidMaxMinBounds = true;
 };
 
 } // end anonymous namespace
@@ -316,23 +288,19 @@ void LoopTiling::getTileSizes(ArrayRef<AffineForOp> band,
   if (band.empty())
     return;
 
-  tileSizes->resize(band.size());
-
-  // Use clTileSize for all loops if specified.
-  if (clTileSize.getNumOccurrences() > 0) {
-    std::fill(tileSizes->begin(), tileSizes->end(), clTileSize);
+  // Use tileSize for all loops if specified.
+  if (tileSize.hasValue()) {
+    tileSizes->assign(band.size(), tileSize);
     return;
   }
 
-  // Use clTileSizes and fill them with default tile size if it's short.
-  if (!clTileSizes.empty()) {
-    std::fill(tileSizes->begin(), tileSizes->end(),
-              LoopTiling::kDefaultTileSize);
-    std::copy(clTileSizes.begin(),
-              clTileSizes.begin() + std::min(clTileSizes.size(), band.size()),
-              tileSizes->begin());
+  // Use tileSizes and fill them with default tile size if it's short.
+  if (!this->tileSizes.empty()) {
+    tileSizes->assign(this->tileSizes.begin(), this->tileSizes.end());
+    tileSizes->resize(band.size(), kDefaultTileSize);
     return;
   }
+  tileSizes->resize(band.size());
 
   // The first loop in the band.
   auto rootForOp = band[0];
@@ -356,6 +324,7 @@ void LoopTiling::getTileSizes(ArrayRef<AffineForOp> band,
   }
 
   // Check how many times larger the cache size is when compared to footprint.
+  uint64_t cacheSizeBytes = cacheSizeInKiB * 1024;
   uint64_t excessFactor = llvm::divideCeil(fp.getValue(), cacheSizeBytes);
   if (excessFactor <= 1) {
     // No need of any tiling - set tile size to 1.
@@ -388,10 +357,6 @@ void LoopTiling::getTileSizes(ArrayRef<AffineForOp> band,
 }
 
 void LoopTiling::runOnFunction() {
-  // Override cache size if provided on command line.
-  if (clCacheSizeKiB.getNumOccurrences() > 0)
-    cacheSizeBytes = clCacheSizeKiB * 1024;
-
   // Bands of loops to tile.
   std::vector<SmallVector<AffineForOp, 6>> bands;
   getTileableBands(getFunction(), &bands);
@@ -399,7 +364,7 @@ void LoopTiling::runOnFunction() {
   // Tile each band.
   for (auto &band : bands) {
     // Set up tile sizes; fill missing tile sizes at the end with default tile
-    // size or clTileSize if one was provided.
+    // size or tileSize if one was provided.
     SmallVector<unsigned, 6> tileSizes;
     getTileSizes(band, &tileSizes);
     if (llvm::DebugFlag) {
@@ -413,7 +378,7 @@ void LoopTiling::runOnFunction() {
       return signalPassFailure();
 
     // Separate full and partial tiles.
-    if (clSeparate) {
+    if (separate) {
       auto intraTileLoops =
           MutableArrayRef<AffineForOp>(tiledNest).drop_front(band.size());
       separateFullTiles(intraTileLoops);
@@ -422,4 +387,3 @@ void LoopTiling::runOnFunction() {
 }
 
 constexpr unsigned LoopTiling::kDefaultTileSize;
-constexpr uint64_t LoopTiling::kDefaultCacheMemCapacity;
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
index f02cf36..1dcbc81 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
@@ -59,24 +59,27 @@ namespace {
 /// with trip count less than the specified threshold. The latter is for testing
 /// purposes, especially for testing outer loop unrolling.
 struct LoopUnroll : public AffineLoopUnrollBase<LoopUnroll> {
-  const Optional<unsigned> unrollFactor;
-  const Optional<bool> unrollFull;
   // Callback to obtain unroll factors; if this has a callable target, takes
   // precedence over command-line argument or passed argument.
   const std::function<unsigned(AffineForOp)> getUnrollFactor;
 
+  LoopUnroll() : getUnrollFactor(nullptr) {}
+  LoopUnroll(const LoopUnroll &other)
+      : AffineLoopUnrollBase<LoopUnroll>(other),
+        getUnrollFactor(other.getUnrollFactor) {}
   explicit LoopUnroll(
-      Optional<unsigned> unrollFactor = None, Optional<bool> unrollFull = None,
+      Optional<unsigned> unrollFactor = None, bool unrollFull = false,
       const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr)
-      : unrollFactor(unrollFactor), unrollFull(unrollFull),
-        getUnrollFactor(getUnrollFactor) {}
+      : getUnrollFactor(getUnrollFactor) {
+    if (unrollFactor)
+      this->unrollFactor = *unrollFactor;
+    this->unrollFull = unrollFull;
+  }
 
   void runOnFunction() override;
 
   /// Unroll this for op. Returns failure if nothing was done.
   LogicalResult runOnAffineForOp(AffineForOp forOp);
-
-  static const unsigned kDefaultUnrollFactor = 4;
 };
 } // end anonymous namespace
 
@@ -102,8 +105,7 @@ static void gatherInnermostLoops(FuncOp f,
 }
 
 void LoopUnroll::runOnFunction() {
-  if (clUnrollFull.getNumOccurrences() > 0 &&
-      clUnrollFullThreshold.getNumOccurrences() > 0) {
+  if (unrollFull && unrollFullThreshold.hasValue()) {
     // Store short loops as we walk.
     SmallVector<AffineForOp, 4> loops;
 
@@ -112,7 +114,7 @@ void LoopUnroll::runOnFunction() {
     // an outer one may delete gathered inner ones).
     getFunction().walk([&](AffineForOp forOp) {
       Optional<uint64_t> tripCount = getConstantTripCount(forOp);
-      if (tripCount.hasValue() && tripCount.getValue() <= clUnrollFullThreshold)
+      if (tripCount.hasValue() && tripCount.getValue() <= unrollFullThreshold)
         loops.push_back(forOp);
     });
     for (auto forOp : loops)
@@ -120,9 +122,6 @@ void LoopUnroll::runOnFunction() {
     return;
   }
 
-  unsigned numRepetitions = clUnrollNumRepetitions.getNumOccurrences() > 0
-                                ? clUnrollNumRepetitions
-                                : 1;
   // If the call back is provided, we will recurse until no loops are found.
   FuncOp func = getFunction();
   SmallVector<AffineForOp, 4> loops;
@@ -144,28 +143,19 @@ void LoopUnroll::runOnFunction() {
 /// failure otherwise. The default unroll factor is 4.
 LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
   // Use the function callback if one was provided.
-  if (getUnrollFactor) {
+  if (getUnrollFactor)
     return loopUnrollByFactor(forOp, getUnrollFactor(forOp));
-  }
-  // Unroll by the factor passed, if any.
-  if (unrollFactor.hasValue())
-    return loopUnrollByFactor(forOp, unrollFactor.getValue());
-  // Unroll by the command line factor if one was specified.
-  if (clUnrollFactor.getNumOccurrences() > 0)
-    return loopUnrollByFactor(forOp, clUnrollFactor);
   // Unroll completely if full loop unroll was specified.
-  if (clUnrollFull.getNumOccurrences() > 0 ||
-      (unrollFull.hasValue() && unrollFull.getValue()))
+  if (unrollFull)
     return loopUnrollFull(forOp);
-
-  // Unroll by four otherwise.
-  return loopUnrollByFactor(forOp, kDefaultUnrollFactor);
+  // Otherwise, unroll by the given unroll factor.
+  return loopUnrollByFactor(forOp, unrollFactor);
 }
 
 std::unique_ptr<OperationPass<FuncOp>> mlir::createLoopUnrollPass(
-    int unrollFactor, int unrollFull,
+    int unrollFactor, bool unrollFull,
     const std::function<unsigned(AffineForOp)> &getUnrollFactor) {
   return std::make_unique<LoopUnroll>(
-      unrollFactor == -1 ? None : Optional<unsigned>(unrollFactor),
-      unrollFull == -1 ? None : Optional<bool>(unrollFull), getUnrollFactor);
+      unrollFactor == -1 ? None : Optional<unsigned>(unrollFactor), unrollFull,
+      getUnrollFactor);
 }
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
index dffc7c0..28830da 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
@@ -49,27 +49,16 @@ using namespace mlir;
 
 #define DEBUG_TYPE "affine-loop-unroll-jam"
 
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-// Loop unroll and jam factor.
-static llvm::cl::opt<unsigned>
-    clUnrollJamFactor("unroll-jam-factor", llvm::cl::Hidden,
-                      llvm::cl::desc("Use this unroll jam factor for all loops"
-                                     " (default 4)"),
-                      llvm::cl::cat(clOptionsCategory));
-
 namespace {
 /// Loop unroll jam pass. Currently, this just unroll jams the first
 /// outer loop in a Function.
 struct LoopUnrollAndJam : public AffineLoopUnrollAndJamBase<LoopUnrollAndJam> {
-  Optional<unsigned> unrollJamFactor;
-  static const unsigned kDefaultUnrollJamFactor = 4;
-
-  explicit LoopUnrollAndJam(Optional<unsigned> unrollJamFactor = None)
-      : unrollJamFactor(unrollJamFactor) {}
+  explicit LoopUnrollAndJam(Optional<unsigned> unrollJamFactor = None) {
+    if (unrollJamFactor)
+      this->unrollJamFactor = *unrollJamFactor;
+  }
 
   void runOnFunction() override;
-  LogicalResult runOnAffineForOp(AffineForOp forOp);
 };
 } // end anonymous namespace
 
@@ -85,19 +74,5 @@ void LoopUnrollAndJam::runOnFunction() {
   // any for operation.
   auto &entryBlock = getFunction().front();
   if (auto forOp = dyn_cast<AffineForOp>(entryBlock.front()))
-    runOnAffineForOp(forOp);
-}
-
-/// Unroll and jam a 'affine.for' op. Default unroll jam factor is
-/// kDefaultUnrollJamFactor. Return failure if nothing was done.
-LogicalResult LoopUnrollAndJam::runOnAffineForOp(AffineForOp forOp) {
-  // Unroll and jam by the factor that was passed if any.
-  if (unrollJamFactor.hasValue())
-    return loopUnrollJamByFactor(forOp, unrollJamFactor.getValue());
-  // Otherwise, unroll jam by the command-line factor if one was specified.
-  if (clUnrollJamFactor.getNumOccurrences() > 0)
-    return loopUnrollJamByFactor(forOp, clUnrollJamFactor);
-
-  // Unroll and jam by four otherwise.
-  return loopUnrollJamByFactor(forOp, kDefaultUnrollJamFactor);
+    loopUnrollJamByFactor(forOp, unrollJamFactor);
 }
diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
index d6b2bd1..620b200 100644
--- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
@@ -582,7 +582,7 @@ struct Vectorize : public AffineVectorizeBase<Vectorize> {
 } // end anonymous namespace
 
 Vectorize::Vectorize(ArrayRef<int64_t> virtualVectorSize) {
-  vectorSizes->assign(virtualVectorSize.begin(), virtualVectorSize.end());
+  vectorSizes = virtualVectorSize;
 }
 
 /////// TODO(ntv): Hoist to a VectorizationStrategy.cpp when appropriate.
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
index cda2d08..997895b 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -508,9 +508,7 @@ static void tileLinalgOps(FuncOp f, ArrayRef<int64_t> tileSizes) {
 namespace {
 struct LinalgTilingPass : public LinalgTilingBase<LinalgTilingPass> {
   LinalgTilingPass() = default;
-  LinalgTilingPass(ArrayRef<int64_t> sizes) {
-    tileSizes->assign(sizes.begin(), sizes.end());
-  }
+  LinalgTilingPass(ArrayRef<int64_t> sizes) { tileSizes = sizes; }
 
   void runOnFunction() override {
     tileLinalgOps<loop::ForOp>(getFunction(), tileSizes);
@@ -521,7 +519,7 @@ struct LinalgTilingToParallelLoopsPass
     : public LinalgTilingToParallelLoopsBase<LinalgTilingToParallelLoopsPass> {
   LinalgTilingToParallelLoopsPass() = default;
   LinalgTilingToParallelLoopsPass(ArrayRef<int64_t> sizes) {
-    tileSizes->assign(sizes.begin(), sizes.end());
+    tileSizes = sizes;
   }
 
   void runOnFunction() override {
diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp
index 498c23a..beaf4c0 100644
--- a/mlir/lib/IR/AsmPrinter.cpp
+++ b/mlir/lib/IR/AsmPrinter.cpp
@@ -146,6 +146,11 @@ bool OpPrintingFlags::shouldElideElementsAttr(ElementsAttr attr) const {
          *elementsAttrElementLimit < int64_t(attr.getNumElements());
 }
 
+/// Return the size limit for printing large ElementsAttr.
+Optional<int64_t> OpPrintingFlags::getLargeElementsAttrLimit() const {
+  return elementsAttrElementLimit;
+}
+
 /// Return if debug information should be printed.
 bool OpPrintingFlags::shouldPrintDebugInfo() const {
   return printDebugInfoFlag;
diff --git a/mlir/lib/Transforms/Inliner.cpp b/mlir/lib/Transforms/Inliner.cpp
index 582f720..8ee4996 100644
--- a/mlir/lib/Transforms/Inliner.cpp
+++ b/mlir/lib/Transforms/Inliner.cpp
@@ -27,16 +27,6 @@
 
 using namespace mlir;
 
-static llvm::cl::opt<bool> disableCanonicalization(
-    "mlir-disable-inline-simplify",
-    llvm::cl::desc("Disable running simplifications during inlining"),
-    llvm::cl::ReallyHidden, llvm::cl::init(false));
-
-static llvm::cl::opt<unsigned> maxInliningIterations(
-    "mlir-max-inline-iterations",
-    llvm::cl::desc("Maximum number of iterations when inlining within an SCC"),
-    llvm::cl::ReallyHidden, llvm::cl::init(4));
-
 //===----------------------------------------------------------------------===//
 // Symbol Use Tracking
 //===----------------------------------------------------------------------===//
@@ -563,13 +553,55 @@ static void canonicalizeSCC(CallGraph &cg, CGUseList &useList,
     useList.recomputeUses(node, cg);
 }
 
-/// Attempt to inline calls within the given scc, and run canonicalizations with
-/// the given patterns, until a fixed point is reached. This allows for the
-/// inlining of newly devirtualized calls.
-static void inlineSCC(Inliner &inliner, CGUseList &useList,
-                      MutableArrayRef<CallGraphNode *> currentSCC,
-                      MLIRContext *context,
-                      const OwningRewritePatternList &canonPatterns) {
+//===----------------------------------------------------------------------===//
+// InlinerPass
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct InlinerPass : public InlinerBase<InlinerPass> {
+  void runOnOperation() override;
+
+  /// Attempt to inline calls within the given scc, and run canonicalizations
+  /// with the given patterns, until a fixed point is reached. This allows for
+  /// the inlining of newly devirtualized calls.
+  void inlineSCC(Inliner &inliner, CGUseList &useList,
+                 MutableArrayRef<CallGraphNode *> currentSCC,
+                 MLIRContext *context,
+                 const OwningRewritePatternList &canonPatterns);
+};
+} // end anonymous namespace
+
+void InlinerPass::runOnOperation() {
+  CallGraph &cg = getAnalysis<CallGraph>();
+  auto *context = &getContext();
+
+  // The inliner should only be run on operations that define a symbol table,
+  // as the callgraph will need to resolve references.
+  Operation *op = getOperation();
+  if (!op->hasTrait<OpTrait::SymbolTable>()) {
+    op->emitOpError() << " was scheduled to run under the inliner, but does "
+                         "not define a symbol table";
+    return signalPassFailure();
+  }
+
+  // Collect a set of canonicalization patterns to use when simplifying
+  // callable regions within an SCC.
+  OwningRewritePatternList canonPatterns;
+  for (auto *op : context->getRegisteredOperations())
+    op->getCanonicalizationPatterns(canonPatterns, context);
+
+  // Run the inline transform in post-order over the SCCs in the callgraph.
+  Inliner inliner(context, cg);
+  CGUseList useList(getOperation(), cg);
+  runTransformOnCGSCCs(cg, [&](MutableArrayRef<CallGraphNode *> scc) {
+    inlineSCC(inliner, useList, scc, context, canonPatterns);
+  });
+}
+
+void InlinerPass::inlineSCC(Inliner &inliner, CGUseList &useList,
+                            MutableArrayRef<CallGraphNode *> currentSCC,
+                            MLIRContext *context,
+                            const OwningRewritePatternList &canonPatterns) {
   // If we successfully inlined any calls, run some simplifications on the
   // nodes of the scc. Continue attempting to inline until we reach a fixed
   // point, or a maximum iteration count. We canonicalize here as it may
@@ -584,41 +616,6 @@ static void inlineSCC(Inliner &inliner, CGUseList &useList,
   }
 }
 
-//===----------------------------------------------------------------------===//
-// InlinerPass
-//===----------------------------------------------------------------------===//
-
-namespace {
-struct InlinerPass : public InlinerBase<InlinerPass> {
-  void runOnOperation() override {
-    CallGraph &cg = getAnalysis<CallGraph>();
-    auto *context = &getContext();
-
-    // The inliner should only be run on operations that define a symbol table,
-    // as the callgraph will need to resolve references.
-    Operation *op = getOperation();
-    if (!op->hasTrait<OpTrait::SymbolTable>()) {
-      op->emitOpError() << " was scheduled to run under the inliner, but does "
-                           "not define a symbol table";
-      return signalPassFailure();
-    }
-
-    // Collect a set of canonicalization patterns to use when simplifying
-    // callable regions within an SCC.
-    OwningRewritePatternList canonPatterns;
-    for (auto *op : context->getRegisteredOperations())
-      op->getCanonicalizationPatterns(canonPatterns, context);
-
-    // Run the inline transform in post-order over the SCCs in the callgraph.
-    Inliner inliner(context, cg);
-    CGUseList useList(getOperation(), cg);
-    runTransformOnCGSCCs(cg, [&](MutableArrayRef<CallGraphNode *> scc) {
-      inlineSCC(inliner, useList, scc, context, canonPatterns);
-    });
-  }
-};
-} // end anonymous namespace
-
 std::unique_ptr<Pass> mlir::createInlinerPass() {
   return std::make_unique<InlinerPass>();
 }
diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp
index 47ee502..1486f2c 100644
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Transforms/LoopFusion.cpp
@@ -37,36 +37,6 @@ using llvm::SetVector;
 
 using namespace mlir;
 
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-/// Disables fusion profitability check and fuses if valid. Ignore any
-/// additional (redundant) computation tolerance threshold
-/// that would have prevented fusion.
-static llvm::cl::opt<bool>
-    clMaximalLoopFusion("fusion-maximal",
-                        llvm::cl::desc("Enables maximal loop fusion"),
-                        llvm::cl::cat(clOptionsCategory));
-
-/// A threshold in percent of additional computation allowed when fusing.
-static llvm::cl::opt<double> clFusionAddlComputeTolerance(
-    "fusion-compute-tolerance",
-    llvm::cl::desc("Fractional increase in additional "
-                   "computation tolerated while fusing"),
-    llvm::cl::cat(clOptionsCategory));
-
-static llvm::cl::opt<unsigned> clFusionFastMemorySpace(
-    "fusion-fast-mem-space",
-    llvm::cl::desc("Faster memory space number to promote fusion buffers to"),
-    llvm::cl::cat(clOptionsCategory));
-
-// A local buffer of size less than or equal to this size is automatically
-// promoted to fast memory after producer-consumer fusion.
-static llvm::cl::opt<unsigned long long> clFusionLocalBufThreshold(
-    "fusion-local-buf-threshold",
-    llvm::cl::desc("Threshold size (KiB) for promoting local buffers to fast "
-                   "memory space"),
-    llvm::cl::cat(clOptionsCategory));
-
 namespace {
 /// Loop fusion pass. This pass currently supports a greedy fusion policy,
 /// which fuses loop nests with single-writer/single-reader memref dependences
@@ -78,24 +48,15 @@ namespace {
 // and add support for more general loop fusion algorithms.
 
 struct LoopFusion : public AffineLoopFusionBase<LoopFusion> {
-  LoopFusion(unsigned fastMemorySpace = 0, uint64_t localBufSizeThreshold = 0,
-             bool maximalFusion = false)
-      : localBufSizeThreshold(localBufSizeThreshold),
-        fastMemorySpace(fastMemorySpace), maximalFusion(maximalFusion) {}
+  LoopFusion() = default;
+  LoopFusion(unsigned fastMemorySpace, uint64_t localBufSizeThresholdBytes,
+             bool maximalFusion) {
+    this->fastMemorySpace = fastMemorySpace;
+    this->localBufSizeThreshold = localBufSizeThresholdBytes / 1024;
+    this->maximalFusion = maximalFusion;
+  }
 
   void runOnFunction() override;
-
-  // Any local buffers smaller than this size (in bytes) will be created in
-  // `fastMemorySpace` if provided.
-  uint64_t localBufSizeThreshold;
-  Optional<unsigned> fastMemorySpace = None;
-  // If true, ignore any additional (redundant) computation tolerance threshold
-  // that would have prevented fusion.
-  bool maximalFusion;
-
-  // The amount of additional computation that is tolerated while fusing
-  // pair-wise as a fraction of the total computation.
-  constexpr static double kComputeToleranceThreshold = 0.30f;
 };
 
 } // end anonymous namespace
@@ -1098,7 +1059,8 @@ static bool isFusionProfitable(Operation *srcOpInst, Operation *srcStoreOpInst,
                                ArrayRef<Operation *> dstLoadOpInsts,
                                ArrayRef<Operation *> dstStoreOpInsts,
                                ComputationSliceState *sliceState,
-                               unsigned *dstLoopDepth, bool maximalFusion) {
+                               unsigned *dstLoopDepth, bool maximalFusion,
+                               double computeToleranceThreshold) {
   LLVM_DEBUG({
     llvm::dbgs() << "Checking whether fusion is profitable between:\n";
     llvm::dbgs() << " " << *srcOpInst << " and \n";
@@ -1247,11 +1209,6 @@ static bool isFusionProfitable(Operation *srcOpInst, Operation *srcStoreOpInst,
       llvm::dbgs() << msg.str();
     });
 
-    double computeToleranceThreshold =
-        clFusionAddlComputeTolerance.getNumOccurrences() > 0
-            ? clFusionAddlComputeTolerance
-            : LoopFusion::kComputeToleranceThreshold;
-
     // TODO(b/123247369): This is a placeholder cost model.
     // Among all choices that add an acceptable amount of redundant computation
     // (as per computeToleranceThreshold), we will simply pick the one that
@@ -1426,13 +1383,18 @@ public:
   // If true, ignore any additional (redundant) computation tolerance threshold
   // that would have prevented fusion.
   bool maximalFusion;
+  // The amount of additional computation that is tolerated while fusing
+  // pair-wise as a fraction of the total computation.
+  double computeToleranceThreshold;
 
   using Node = MemRefDependenceGraph::Node;
 
   GreedyFusion(MemRefDependenceGraph *mdg, unsigned localBufSizeThreshold,
-               Optional<unsigned> fastMemorySpace, bool maximalFusion)
+               Optional<unsigned> fastMemorySpace, bool maximalFusion,
+               double computeToleranceThreshold)
       : mdg(mdg), localBufSizeThreshold(localBufSizeThreshold),
-        fastMemorySpace(fastMemorySpace), maximalFusion(maximalFusion) {}
+        fastMemorySpace(fastMemorySpace), maximalFusion(maximalFusion),
+        computeToleranceThreshold(computeToleranceThreshold) {}
 
   // Initializes 'worklist' with nodes from 'mdg'
   void init() {
@@ -1608,7 +1570,8 @@ public:
           // Check if fusion would be profitable.
           if (!isFusionProfitable(srcStoreOp, srcStoreOp, dstLoadOpInsts,
                                   dstStoreOpInsts, &sliceState,
-                                  &bestDstLoopDepth, maximalFusion))
+                                  &bestDstLoopDepth, maximalFusion,
+                                  computeToleranceThreshold))
             continue;
 
           // Fuse computation slice of 'srcLoopNest' into 'dstLoopNest'.
@@ -1769,7 +1732,7 @@ public:
       // Check if fusion would be profitable.
       if (!isFusionProfitable(sibLoadOpInst, sibStoreOpInst, dstLoadOpInsts,
                               dstStoreOpInsts, &sliceState, &bestDstLoopDepth,
-                              maximalFusion))
+                              maximalFusion, computeToleranceThreshold))
         continue;
 
       // Fuse computation slice of 'sibLoopNest' into 'dstLoopNest'.
@@ -1954,21 +1917,15 @@ public:
 } // end anonymous namespace
 
 void LoopFusion::runOnFunction() {
-  // Override if a command line argument was provided.
-  if (clFusionFastMemorySpace.getNumOccurrences() > 0) {
-    fastMemorySpace = clFusionFastMemorySpace.getValue();
-  }
-
-  // Override if a command line argument was provided.
-  if (clFusionLocalBufThreshold.getNumOccurrences() > 0) {
-    localBufSizeThreshold = clFusionLocalBufThreshold * 1024;
-  }
-
-  if (clMaximalLoopFusion.getNumOccurrences() > 0)
-    maximalFusion = clMaximalLoopFusion;
-
   MemRefDependenceGraph g;
-  if (g.init(getFunction()))
-    GreedyFusion(&g, localBufSizeThreshold, fastMemorySpace, maximalFusion)
-        .run();
+  if (!g.init(getFunction()))
+    return;
+
+  Optional<unsigned> fastMemorySpaceOpt;
+  if (fastMemorySpace.hasValue())
+    fastMemorySpaceOpt = fastMemorySpace;
+  unsigned localBufSizeThresholdBytes = localBufSizeThreshold * 1024;
+  GreedyFusion fusion(&g, localBufSizeThresholdBytes, fastMemorySpaceOpt,
+                      maximalFusion, computeToleranceThreshold);
+  fusion.run();
 }
diff --git a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
index 79b3db6..258df35 100644
--- a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
+++ b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
@@ -23,13 +23,10 @@ using namespace mlir;
 
 #define DEBUG_TYPE "pattern-matcher"
 
-static llvm::cl::opt<unsigned> maxPatternMatchIterations(
-    "mlir-max-pattern-match-iterations",
-    llvm::cl::desc("Max number of iterations scanning for pattern match"),
-    llvm::cl::init(10));
+/// The max number of iterations scanning for pattern match.
+static unsigned maxPatternMatchIterations = 10;
 
 namespace {
-
 /// This is a worklist-driven driver for the PatternMatcher, which repeatedly
 /// applies the locally optimal patterns in a roughly "bottom up" way.
 class GreedyPatternRewriteDriver : public PatternRewriter {
diff --git a/mlir/lib/Transforms/ViewOpGraph.cpp b/mlir/lib/Transforms/ViewOpGraph.cpp
index 41e33e8..6ead363 100644
--- a/mlir/lib/Transforms/ViewOpGraph.cpp
+++ b/mlir/lib/Transforms/ViewOpGraph.cpp
@@ -14,13 +14,16 @@
 #include "mlir/Support/STLExtras.h"
 #include "llvm/Support/CommandLine.h"
 
-static llvm::cl::opt<int> elideIfLarger(
-    "print-op-graph-elide-if-larger",
-    llvm::cl::desc("Upper limit to emit elements attribute rather than elide"),
-    llvm::cl::init(16));
-
 using namespace mlir;
 
+/// Return the size limits for eliding large attributes.
+static int64_t getLargeAttributeSizeLimit() {
+  // Use the default from the printer flags if possible.
+  if (Optional<int64_t> limit = OpPrintingFlags().getLargeElementsAttrLimit())
+    return *limit;
+  return 16;
+}
+
 namespace llvm {
 
 // Specialize GraphTraits to treat Block as a graph of Operations as nodes and
@@ -65,6 +68,8 @@ std::string DOTGraphTraits<Block *>::getNodeLabel(Operation *op, Block *b) {
   interleaveComma(op->getResultTypes(), os);
   os << "\n";
 
+  // A value used to elide large container attribute.
+  int64_t largeAttrLimit = getLargeAttributeSizeLimit();
   for (auto attr : op->getAttrs()) {
     os << '\n' << attr.first << ": ";
     // Always emit splat attributes.
@@ -75,7 +80,7 @@ std::string DOTGraphTraits<Block *>::getNodeLabel(Operation *op, Block *b) {
 
     // Elide "big" elements attributes.
     auto elements = attr.second.dyn_cast<ElementsAttr>();
-    if (elements && elements.getNumElements() > elideIfLarger) {
+    if (elements && elements.getNumElements() > largeAttrLimit) {
       os << std::string(elements.getType().getRank(), '[') << "..."
          << std::string(elements.getType().getRank(), ']') << " : "
          << elements.getType();
@@ -83,7 +88,7 @@ std::string DOTGraphTraits<Block *>::getNodeLabel(Operation *op, Block *b) {
     }
 
     auto array = attr.second.dyn_cast<ArrayAttr>();
-    if (array && static_cast<int64_t>(array.size()) > elideIfLarger) {
+    if (array && static_cast<int64_t>(array.size()) > largeAttrLimit) {
       os << "[...]";
       continue;
     }
diff --git a/mlir/test/Dialect/Affine/affine-data-copy.mlir b/mlir/test/Dialect/Affine/affine-data-copy.mlir
index de8889e..52c60d7 100644
--- a/mlir/test/Dialect/Affine/affine-data-copy.mlir
+++ b/mlir/test/Dialect/Affine/affine-data-copy.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt %s -split-input-file -affine-data-copy-generate -affine-data-copy-generate-dma=false -affine-data-copy-generate-fast-mem-space=0 -affine-data-copy-generate-skip-non-unit-stride-loops | FileCheck %s
+// RUN: mlir-opt %s -split-input-file -affine-data-copy-generate="generate-dma=false fast-mem-space=0 skip-non-unit-stride-loops" | FileCheck %s
 // Small buffer size to trigger fine copies.
-// RUN: mlir-opt %s -split-input-file -affine-data-copy-generate -affine-data-copy-generate-dma=false -affine-data-copy-generate-fast-mem-space=0 -affine-data-copy-generate-fast-mem-capacity=1 | FileCheck --check-prefix=CHECK-SMALL %s
+// RUN: mlir-opt %s -split-input-file -affine-data-copy-generate="generate-dma=false fast-mem-space=0 fast-mem-capacity=1" | FileCheck --check-prefix=CHECK-SMALL %s
 
 // Test affine data copy with a memref filter. We use a test pass that invokes
 // affine data copy utility on the input loop nest.
diff --git a/mlir/test/Dialect/Affine/dma-generate.mlir b/mlir/test/Dialect/Affine/dma-generate.mlir
index 6afbb16..3572b0a 100644
--- a/mlir/test/Dialect/Affine/dma-generate.mlir
+++ b/mlir/test/Dialect/Affine/dma-generate.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate -affine-data-copy-generate-dma -affine-data-copy-generate-fast-mem-space=2 -affine-data-copy-generate-skip-non-unit-stride-loops -verify-diagnostics | FileCheck %s
-// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate -affine-data-copy-generate-dma -affine-data-copy-generate-fast-mem-capacity=16 -affine-data-copy-generate-fast-mem-space=2 | FileCheck %s --check-prefix FAST-MEM-16KB
+// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate="generate-dma fast-mem-space=2 skip-non-unit-stride-loops" -verify-diagnostics | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate="generate-dma fast-mem-capacity=16 fast-mem-space=2" | FileCheck %s --check-prefix FAST-MEM-16KB
 
 // We run most test cases with -copy-skip-non-unit-stride-loops to allow testing
 // DMA generation at inner levels easily - since the DMA generation would
diff --git a/mlir/test/Dialect/Affine/inlining.mlir b/mlir/test/Dialect/Affine/inlining.mlir
index a83ef37..e65ae5d 100644
--- a/mlir/test/Dialect/Affine/inlining.mlir
+++ b/mlir/test/Dialect/Affine/inlining.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -inline -mlir-disable-inline-simplify | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -inline="disable-simplify" | FileCheck %s
 
 // Basic test that functions within affine operations are inlined.
 func @func_with_affine_ops(%N: index) {
diff --git a/mlir/test/Dialect/Affine/loop-tiling.mlir b/mlir/test/Dialect/Affine/loop-tiling.mlir
index 110233a..075ab0c 100644
--- a/mlir/test/Dialect/Affine/loop-tiling.mlir
+++ b/mlir/test/Dialect/Affine/loop-tiling.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file  -affine-loop-tile -affine-tile-size=32 | FileCheck %s
-// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile -affine-tile-cache-size=512 | FileCheck %s --check-prefix=MODEL
-// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile -affine-tile-size=32 -affine-tile-separate | FileCheck %s --check-prefix=SEPARATE
+// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile="tile-size=32" | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile="cache-size=512" | FileCheck %s --check-prefix=MODEL
+// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile="tile-size=32 separate" | FileCheck %s --check-prefix=SEPARATE
 
 // -----
 
diff --git a/mlir/test/Dialect/Affine/unroll-jam.mlir b/mlir/test/Dialect/Affine/unroll-jam.mlir
index 9d23f7a..b1513be 100644
--- a/mlir/test/Dialect/Affine/unroll-jam.mlir
+++ b/mlir/test/Dialect/Affine/unroll-jam.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam -unroll-jam-factor=2 | FileCheck %s
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam -unroll-jam-factor=4 | FileCheck --check-prefix=UJAM-FOUR %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam="unroll-jam-factor=2" | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam="unroll-jam-factor=4" | FileCheck --check-prefix=UJAM-FOUR %s
 
 // CHECK-DAG: [[MAP_PLUS_1:#map[0-9]+]] = affine_map<(d0) -> (d0 + 1)>
 // CHECK-DAG: [[MAP_DIV_OFFSET:#map[0-9]+]] = affine_map<()[s0] -> (((s0 - 1) floordiv 2) * 2 + 1)>
diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir
index 2681fdc..cd9fb63 100644
--- a/mlir/test/Dialect/Affine/unroll.mlir
+++ b/mlir/test/Dialect/Affine/unroll.mlir
@@ -1,7 +1,7 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll -unroll-full | FileCheck %s --check-prefix UNROLL-FULL
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll -unroll-full -unroll-full-threshold=2 | FileCheck %s --check-prefix SHORT
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll -unroll-factor=4 | FileCheck %s --check-prefix UNROLL-BY-4
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll -unroll-factor=1 | FileCheck %s --check-prefix UNROLL-BY-1
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-full" | FileCheck %s --check-prefix UNROLL-FULL
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-full unroll-full-threshold=2" | FileCheck %s --check-prefix SHORT
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=4" | FileCheck %s --check-prefix UNROLL-BY-4
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=1" | FileCheck %s --check-prefix UNROLL-BY-1
 
 // UNROLL-FULL-DAG: [[MAP0:#map[0-9]+]] = affine_map<(d0) -> (d0 + 1)>
 // UNROLL-FULL-DAG: [[MAP1:#map[0-9]+]] = affine_map<(d0) -> (d0 + 2)>
diff --git a/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir b/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir
index fc188c3..24b22c5 100644
--- a/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir
+++ b/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -split-input-file -pass-pipeline='spv.module(inline)' -mlir-disable-inline-simplify | FileCheck %s
+// RUN: mlir-opt %s -split-input-file -pass-pipeline='spv.module(inline{disable-simplify})' | FileCheck %s
 
 spv.module Logical GLSL450 {
   spv.func @callee() "None" {
diff --git a/mlir/test/Transforms/inlining.mlir b/mlir/test/Transforms/inlining.mlir
index 980b3c3..d62400f 100644
--- a/mlir/test/Transforms/inlining.mlir
+++ b/mlir/test/Transforms/inlining.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt %s -inline -mlir-disable-inline-simplify | FileCheck %s
-// RUN: mlir-opt %s -inline -mlir-disable-inline-simplify -mlir-print-debuginfo | FileCheck %s --check-prefix INLINE-LOC
-// RUN: mlir-opt %s -inline -mlir-disable-inline-simplify=false | FileCheck %s --check-prefix INLINE_SIMPLIFY
+// RUN: mlir-opt %s -inline="disable-simplify" | FileCheck %s
+// RUN: mlir-opt %s -inline="disable-simplify" -mlir-print-debuginfo | FileCheck %s --check-prefix INLINE-LOC
+// RUN: mlir-opt %s -inline | FileCheck %s --check-prefix INLINE_SIMPLIFY
 
 // Inline a function that takes an argument.
 func @func_with_arg(%c : i32) -> i32 {
diff --git a/mlir/test/Transforms/loop-fusion.mlir b/mlir/test/Transforms/loop-fusion.mlir
index d1e1587..d19aa5e 100644
--- a/mlir/test/Transforms/loop-fusion.mlir
+++ b/mlir/test/Transforms/loop-fusion.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-fusion -split-input-file | FileCheck %s
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-fusion -fusion-maximal -split-input-file | FileCheck %s --check-prefix=MAXIMAL
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-fusion="fusion-maximal" -split-input-file | FileCheck %s --check-prefix=MAXIMAL
 
 // TODO(andydavis) Add more tests:
 // *) Add nested fusion test cases when non-constant loop bound support is
diff --git a/mlir/test/lib/Pass/TestPassManager.cpp b/mlir/test/lib/Pass/TestPassManager.cpp
index ffac1b1..f6ac0de 100644
--- a/mlir/test/lib/Pass/TestPassManager.cpp
+++ b/mlir/test/lib/Pass/TestPassManager.cpp
@@ -35,10 +35,9 @@ public:
   TestOptionsPass() = default;
   TestOptionsPass(const TestOptionsPass &) {}
   TestOptionsPass(const Options &options) {
-    listOption->assign(options.listOption.begin(), options.listOption.end());
-    stringOption.setValue(options.stringOption);
-    stringListOption->assign(options.stringListOption.begin(),
-                             options.stringListOption.end());
+    listOption = options.listOption;
+    stringOption = options.stringOption;
+    stringListOption = options.stringListOption;
   }
 
   void runOnFunction() final {}
-- 
2.7.4