[mlir][NFC] Remove trailing whitespaces from `*.td` and `*.mlir` files.

author Hanhan Wang <hanchung@google.com>

Mon, 28 Nov 2022 22:17:55 +0000 (14:17 -0800)

committer Hanhan Wang <hanchung@google.com>

Mon, 28 Nov 2022 23:26:30 +0000 (15:26 -0800)
author Hanhan Wang <hanchung@google.com>
Mon, 28 Nov 2022 22:17:55 +0000 (14:17 -0800)
committer Hanhan Wang <hanchung@google.com>
Mon, 28 Nov 2022 23:26:30 +0000 (15:26 -0800)
diff --git a/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td b/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td

index c8d3e2c..678930f 100644 (file)
--- a/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td
+++ b/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td
@@ -111,8 +111,8 @@ def Async_FuncOp : Async_Op<"func",
    let description = [{
      An async function is like a normal function, but supports non-blocking
      await. Internally, async function is lowered to the LLVM coroutinue with
-    async runtime intrinsic. It can return an async token and/or async values. 
-    The token represents the execution state of async function and can be used 
+    async runtime intrinsic. It can return an async token and/or async values.
+    The token represents the execution state of async function and can be used
      when users want to express dependencies on some side effects, e.g.,
      the token becomes available once every thing in the func body is executed.
  
diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationEnums.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationEnums.td

index 9242361..bafa846 100644 (file)
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationEnums.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationEnums.td
@@ -15,7 +15,7 @@
  
  include "mlir/IR/EnumAttr.td"
  
-def LayoutMapOption : I32EnumAttr<"LayoutMapOption", 
+def LayoutMapOption : I32EnumAttr<"LayoutMapOption",
                                    "option for map layout", [
    I32EnumAttrCase<"InferLayoutMap", 0>,
    I32EnumAttrCase<"IdentityLayoutMap", 1>,
diff --git a/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td b/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td

index b8d258e..02dceaa 100644 (file)
--- a/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td
+++ b/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td
@@ -121,7 +121,7 @@ def ConstantOp : Complex_Op<"constant", [
    let assemblyFormat = "$value attr-dict `:` type($complex)";
    let hasFolder = 1;
    let hasVerifier = 1;
-  
+
    let extraClassDeclaration = [{
      /// Returns true if a constant operation can be built with the given value
      /// and result type.
diff --git a/mlir/include/mlir/Dialect/GPU/IR/ParallelLoopMapperAttr.td b/mlir/include/mlir/Dialect/GPU/IR/ParallelLoopMapperAttr.td

index 0a15095..3386a60 100644 (file)
--- a/mlir/include/mlir/Dialect/GPU/IR/ParallelLoopMapperAttr.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/ParallelLoopMapperAttr.td
@@ -37,7 +37,7 @@ def ProcessorEnum : I64EnumAttr<"Processor", "processor for loop mapping", [
  //       substitution.
  // bound : An affine map that is used to compute the bound of the hardware
  //         id based on an upper bound of the number of iterations.
-def ParallelLoopDimMappingAttr 
+def ParallelLoopDimMappingAttr
      : GPU_Attr<"ParallelLoopDimMapping", "loop_dim_map"> {
    let parameters = (ins
      EnumParameter<ProcessorEnum>:$processor,
diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUDeviceMappingAttr.td b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUDeviceMappingAttr.td

index bf67b3b..43ffcc3 100644 (file)
--- a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUDeviceMappingAttr.td
+++ b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUDeviceMappingAttr.td
@@ -6,7 +6,7 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// Defines the attribute used to map loops to gpu. 
+// Defines the attribute used to map loops to gpu.
  //
  //===----------------------------------------------------------------------===//
  
@@ -26,8 +26,8 @@ def ThreadsEnum : I64EnumAttr<"Threads", "threads for loop mapping", [
    let cppNamespace = "::mlir::gpu";
  }
  
-def GPUThreadMappingAttr 
-    : GPU_Attr<"GPUThreadMapping", "thread", [ 
+def GPUThreadMappingAttr
+    : GPU_Attr<"GPUThreadMapping", "thread", [
        DeclareAttrInterfaceMethods<DeviceMappingAttrInterface> ]> {
    let parameters = (ins
      EnumParameter<ThreadsEnum>:$thread
@@ -36,9 +36,9 @@ def GPUThreadMappingAttr
    let description = [{
      An attribute that allows defining thread parallelism for GPU devices.
  
-    Thread (aka work item) are grouped into a thread blocks where block may be 
-    described by a 1-, 2-, or 3-dimensional rectangle. This attribute indicates 
-    that thread parallelism is desired. It can be consumed by lowering to 
+    Thread (aka work item) are grouped into a thread blocks where block may be
+    described by a 1-, 2-, or 3-dimensional rectangle. This attribute indicates
+    that thread parallelism is desired. It can be consumed by lowering to
      generate GPU.
    }];
  }
@@ -48,7 +48,7 @@ def BlocksEnum : I64EnumAttr<"Blocks", "threads for loop mapping", [
    let cppNamespace = "::mlir::gpu";
  }
  
-def GPUBlockMappingAttr : GPU_Attr<"GPUBlockMapping", "block", [ 
+def GPUBlockMappingAttr : GPU_Attr<"GPUBlockMapping", "block", [
    DeclareAttrInterfaceMethods<DeviceMappingAttrInterface> ] >  {
    let parameters = (ins
      EnumParameter<BlocksEnum>:$block
@@ -57,8 +57,8 @@ def GPUBlockMappingAttr : GPU_Attr<"GPUBlockMapping", "block", [
    let description = [{
      An attribute that allows defining thread block parallelism for GPU devices.
  
-    Thread blocks (aka work-group) are grouped into a grid where grid may be 
-    described by a 1-, 2-, or 3-dimensional rectangle. This attribute indicates 
+    Thread blocks (aka work-group) are grouped into a grid where grid may be
+    described by a 1-, 2-, or 3-dimensional rectangle. This attribute indicates
      that thread block parallelism is desired. It can be consumed by lowering to
      generate GPU code.
    }];
diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td

index c0b348d..9d1fa99 100644 (file)
--- a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
@@ -16,9 +16,9 @@ include "mlir/Dialect/PDL/IR/PDLTypes.td"
  include "mlir/Interfaces/SideEffectInterfaces.td"
  include "mlir/IR/OpBase.td"
  
-def MapNestedForeachToThreads : 
+def MapNestedForeachToThreads :
    Op<Transform_Dialect, "gpu.map_nested_foreach_to_threads",
-    [FunctionalStyleTransformOpTrait, 
+    [FunctionalStyleTransformOpTrait,
       MemoryEffectsOpInterface,
       TransformEachOpTrait,
       TransformOpInterface]> {
@@ -26,44 +26,44 @@ def MapNestedForeachToThreads :
        Target the `gpu.launch op` and rewrite all `scf.foreach_thread`
        nested in it to distributed `gpu.thread_id` attribute.
  
-      The operation searches for `scf.foreach_thread` ops nested under `target` 
-      and maps each such op to GPU threads. Mapping is one-to-one and the 
-      induction variables of `scf.foreach_thread` are rewritten to 
-      `gpu.thread_id` according to the `mapping` attribute. 
-      
-      Sibling `scf.foreach_thread` are supported in which case, the union of 
+      The operation searches for `scf.foreach_thread` ops nested under `target`
+      and maps each such op to GPU threads. Mapping is one-to-one and the
+      induction variables of `scf.foreach_thread` are rewritten to
+      `gpu.thread_id` according to the `mapping` attribute.
+
+      Sibling `scf.foreach_thread` are supported in which case, the union of
        the number of threads is computed and may result in predication.
  
-      Multiple scf.foreach_thread are supported per `gpu.launch` in which case, 
-      the max of all the threads is computed and taken for the global 
-      `gpu.thread_id`. If necessary, `scf.foreach_thread` that do not use the 
+      Multiple scf.foreach_thread are supported per `gpu.launch` in which case,
+      the max of all the threads is computed and taken for the global
+      `gpu.thread_id`. If necessary, `scf.foreach_thread` that do not use the
        whole thread range result in predicated computations.
  
-      Dynamic `scf.foreach_thread` trip counts are currently not supported. 
+      Dynamic `scf.foreach_thread` trip counts are currently not supported.
        Dynamic block dim sizes are currently not supported.
  
        Only **bufferized** `scf.foreach_thread` are currently supported.
-      Only `scf.foreach_thread` distributed to **at most 3 dimensions** are 
+      Only `scf.foreach_thread` distributed to **at most 3 dimensions** are
        currently supported.
-    
+
        Barriers are inserted after each scf.foreach_thread op for now.
  
-      The operation alters the block size of the given gpu_launch using 
+      The operation alters the block size of the given gpu_launch using
        blockDim argument.
  
        #### Return modes:
-      
+
        This operation ignores non-gpu_launch ops and drops them in the return.
  
-      If any scf.foreach_thread with tensors is found, the transform definitely 
-      fails.    
+      If any scf.foreach_thread with tensors is found, the transform definitely
+      fails.
  
-      If all the scf.foreach_thread operations contained within the LaunchOp 
-      referred to by the `target` PDLOperation lower to GPU properly, the 
+      If all the scf.foreach_thread operations contained within the LaunchOp
+      referred to by the `target` PDLOperation lower to GPU properly, the
        transform succeeds. Otherwise the transform definitely fails.
-      
+
        The returned handle points to the same LaunchOp operand, consuming it and
-      producing a new SSA value to satisfy chaining and linearity of the IR 
+      producing a new SSA value to satisfy chaining and linearity of the IR
        properties.
  
        #### Example:
@@ -96,7 +96,7 @@ def MapNestedForeachToThreads :
          }
          gpu.barrier
          gpu.terminator
-      }      
+      }
        ```
      }];
  
@@ -106,16 +106,16 @@ def MapNestedForeachToThreads :
    let results = (outs PDL_Operation:$result);
  
    let assemblyFormat = "$target attr-dict";
-  let extraClassDeclaration = [{    
+  let extraClassDeclaration = [{
      ::mlir::DiagnosedSilenceableFailure applyToOne(
-        ::mlir::Operation *target, 
-        ::llvm::SmallVectorImpl<::mlir::Operation *> &results, 
+        ::mlir::Operation *target,
+        ::llvm::SmallVectorImpl<::mlir::Operation *> &results,
          ::mlir::transform::TransformState &state);
    }];
  }
  
  
-def MapForeachToBlocks : 
+def MapForeachToBlocks :
    Op<Transform_Dialect, "gpu.map_foreach_to_blocks",
      [FunctionalStyleTransformOpTrait,
       MemoryEffectsOpInterface,
@@ -123,38 +123,38 @@ def MapForeachToBlocks :
       TransformEachOpTrait]> {
    let description = [{
      Target the gpu_launch op and rewrite the top level `scf.foreach_thread`
-    to distributed gpu.block_id attribute. If `generate_gpu_launch` attribute 
-    is set, then first generates `gpu_launch` and moves the top level 
+    to distributed gpu.block_id attribute. If `generate_gpu_launch` attribute
+    is set, then first generates `gpu_launch` and moves the top level
      `scf.foreach_thread` inside.
  
-    The operation searches top level `scf.foreach_thread` ops under 
-    `gpu_launch` and maps each such op to GPU blocks. Mapping is 
-    one-to-one and the induction variables of `scf.foreach_thread` are 
+    The operation searches top level `scf.foreach_thread` ops under
+    `gpu_launch` and maps each such op to GPU blocks. Mapping is
+    one-to-one and the induction variables of `scf.foreach_thread` are
      rewritten to gpu.block_id according to the `thread_dim_apping` attribute.
  
-    Dynamic, `scf.foreach_thread` trip counts are currently not supported. 
+    Dynamic, `scf.foreach_thread` trip counts are currently not supported.
      Dynamic block dim sizes are currently not supported.
  
      Only **bufferized** scf.foreach_thread are currently supported.
-    Only scf.foreach_thread distributed to **at most 3 dimensions** are 
+    Only scf.foreach_thread distributed to **at most 3 dimensions** are
      currently supported.
  
-    The operation alters the block size of the given gpu_launch using 
+    The operation alters the block size of the given gpu_launch using
      gridDim argument.
  
      #### Return modes:
-    
+
      This operation ignores non-gpu_launch ops and drops them in the return.
  
-    If any scf.foreach_thread with tensors is found, the transform definitely 
-    fails.    
+    If any scf.foreach_thread with tensors is found, the transform definitely
+    fails.
  
-    If all the scf.foreach_thread operations contained within the LaunchOp 
-    referred to by the `target` PDLOperation lower to GPU properly, the 
+    If all the scf.foreach_thread operations contained within the LaunchOp
+    referred to by the `target` PDLOperation lower to GPU properly, the
      transform succeeds. Otherwise the transform definitely fails.
  
      The returned handle points to the same LaunchOp operand, consuming it and
-    producing a new SSA value to satisfy chaining and linearity of the IR 
+    producing a new SSA value to satisfy chaining and linearity of the IR
      properties.
    }];
  
@@ -166,8 +166,8 @@ def MapForeachToBlocks :
    let assemblyFormat = "$target attr-dict";
    let extraClassDeclaration = [{
      ::mlir::DiagnosedSilenceableFailure applyToOne(
-        ::mlir::Operation *target, 
-        ::llvm::SmallVectorImpl<::mlir::Operation *> &results, 
+        ::mlir::Operation *target,
+        ::llvm::SmallVectorImpl<::mlir::Operation *> &results,
          ::mlir::transform::TransformState &state);
    }];
  }
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td

index db834d6..45e03b3 100644 (file)
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td
@@ -342,7 +342,7 @@ def DISubprogramFlags : I32BitEnumAttr<
        LLVM_DISPLocalToUnit,
        LLVM_DISPDefinition,
        LLVM_DISPOptimized,
-      LLVM_DISPPure, 
+      LLVM_DISPPure,
        LLVM_DISPElemental,
        LLVM_DISPRecursive,
        LLVM_DISPMainSubprogram,
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

index 25afd7a..2bba2ee 100644 (file)
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -54,7 +54,7 @@ def NVVM_Dialect : Dialect {
      /// per SM for kernel functions.
      static StringRef getMinctasmAttrName() { return "nvvm.minctasm"; }
  
-    /// Get the name of the attribute used to annotate max number of 
+    /// Get the name of the attribute used to annotate max number of
      /// registers that can be allocated per thread.
      static StringRef getMaxnregAttrName() { return "nvvm.maxnreg"; }
    }];
diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td

index dbb803b..64946bc 100644 (file)
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -27,11 +27,11 @@ def DecomposeOp : Op<Transform_Dialect, "structured.decompose",
      when possible.
  
      #### Return modes
-    
+
      This operation ignores non-Linalg ops and drops them in the return.
      If all the operations referred to by the `target` PDLOperation decompose
      properly, the transform succeeds. Otherwise the transform silently fails.
-    The return handle points to only the subset of successfully produced 
+    The return handle points to only the subset of successfully produced
      computational operations, which can be empty.
    }];
  
@@ -41,8 +41,8 @@ def DecomposeOp : Op<Transform_Dialect, "structured.decompose",
  
    let extraClassDeclaration = [{
      ::mlir::DiagnosedSilenceableFailure applyToOne(
-        ::mlir::linalg::LinalgOp target, 
-        ::llvm::SmallVectorImpl<::mlir::Operation *> &results, 
+        ::mlir::linalg::LinalgOp target,
+        ::llvm::SmallVectorImpl<::mlir::Operation *> &results,
          ::mlir::transform::TransformState &state);
    }];
  }
@@ -83,7 +83,7 @@ def FuseIntoContainingOp :
      The containing op handle must be associated with exactly one payload op. The
      producer op handle may be associated with multiple payload ops. This
      transform fuses producers one-by-one, always picking an unspecified producer
-    that has at least one use inside the containing op among the 
+    that has at least one use inside the containing op among the
      producers.
  
      Note: If a producer has multiple uses inside the containing op, it is
@@ -124,14 +124,14 @@ def GeneralizeOp : Op<Transform_Dialect, "structured.generalize",
       TransformOpInterface, TransformEachOpTrait]> {
    let description = [{
      Transforms a named structured operation into the generic form with the
-    explicit attached region. 
-    
+    explicit attached region.
+
      #### Return modes
-    
+
      This operation ignores non-Linalg ops and drops them in the return.
      If all the operations referred to by the `target` PDLOperation generalize
      properly, the transform succeeds. Otherwise the transform silently fails.
-    The return handle points to only the subset of successfully produced 
+    The return handle points to only the subset of successfully produced
      equivalent generic operations, which can be empty or contain the original
      ops if they were already in generic form.
    }];
@@ -142,8 +142,8 @@ def GeneralizeOp : Op<Transform_Dialect, "structured.generalize",
  
    let extraClassDeclaration = [{
      ::mlir::DiagnosedSilenceableFailure applyToOne(
-        ::mlir::linalg::LinalgOp target, 
-        ::llvm::SmallVectorImpl<::mlir::Operation *> &results, 
+        ::mlir::linalg::LinalgOp target,
+        ::llvm::SmallVectorImpl<::mlir::Operation *> &results,
          ::mlir::transform::TransformState &state);
    }];
  }
@@ -156,13 +156,13 @@ def InterchangeOp : Op<Transform_Dialect, "structured.interchange",
      using the iterator interchange attribute.
  
      #### Return modes
-    
+
      This operation ignores non-linalg::Generic ops and drops them in the return.
      This operation fails if the interchange attribute is invalid.
      If all the operations referred to by the `target` PDLOperation interchange
-    properly, the transform succeeds. 
+    properly, the transform succeeds.
      If any interchange fails, the transform definitely fails.
-    The return handle points to only the subset of successfully produced 
+    The return handle points to only the subset of successfully produced
      interchanged operations, which can be empty.
    }];
  
@@ -176,8 +176,8 @@ def InterchangeOp : Op<Transform_Dialect, "structured.interchange",
  
    let extraClassDeclaration = [{
      ::mlir::DiagnosedSilenceableFailure applyToOne(
-        ::mlir::linalg::GenericOp target, 
-        ::llvm::SmallVectorImpl<::mlir::Operation *> &results, 
+        ::mlir::linalg::GenericOp target,
+        ::llvm::SmallVectorImpl<::mlir::Operation *> &results,
          ::mlir::transform::TransformState &state);
    }];
  }
@@ -206,7 +206,7 @@ def MatchOp : Op<Transform_Dialect, "structured.match",
        - attribute: the matched op must have all specified attributes (with their
          specified values).
        - filter_result_type: the matched op must return exactly this one type.
-      
+
      Note: Only ops that satisfy all specified constraints are matched.
  
      TODO: Extend with regions to allow a limited form of constraints.
@@ -216,7 +216,7 @@ def MatchOp : Op<Transform_Dialect, "structured.match",
      This op traverses the ops nested under `target` and returns the handles to
      all the operations that match the requirements.
  
-    This op fails if the target is not a handle to exactly one operation. 
+    This op fails if the target is not a handle to exactly one operation.
      Otherwise it succeeds.
  
      This operation does not consume the target handle and produces new handles:
@@ -302,7 +302,7 @@ def MultiTileSizesOp : Op<Transform_Dialect, "structured.multitile_sizes",
  
    let extraClassDeclaration = [{
      ::mlir::DiagnosedSilenceableFailure applyToOne(
-        ::mlir::linalg::LinalgOp target, 
+        ::mlir::linalg::LinalgOp target,
          ::llvm::SmallVector<::mlir::Operation *> &results,
          TransformState &state);
    }];
@@ -316,13 +316,13 @@ def PadOp : Op<Transform_Dialect, "structured.pad",
      provides as operation attributes.
  
      #### Return modes
-    
+
      This operation ignores non-Linalg ops and drops them in the return.
      This operation may produce a definiteFailure if the padding fails for any
      reason.
      If all the operations referred to by the `target` PDLOperation pad
      properly, the transform succeeds. Otherwise the transform silently fails.
-    The return handle points to only the subset of successfully produced 
+    The return handle points to only the subset of successfully produced
      padded operations, which can be empty.
    }];
  
@@ -342,8 +342,8 @@ def PadOp : Op<Transform_Dialect, "structured.pad",
  
    let extraClassDeclaration = [{
      ::mlir::DiagnosedSilenceableFailure applyToOne(
-        ::mlir::linalg::LinalgOp target, 
-        ::llvm::SmallVectorImpl<::mlir::Operation *> &results, 
+        ::mlir::linalg::LinalgOp target,
+        ::llvm::SmallVectorImpl<::mlir::Operation *> &results,
          ::mlir::transform::TransformState &state);
    }];
  }
@@ -354,18 +354,18 @@ def PromoteOp : Op<Transform_Dialect, "structured.promote",
    let description = [{
      Promotes the specified operands of the target into a separate memory buffer.
  
-    At this point, this transform does not allow customizing alloc/dealloc 
+    At this point, this transform does not allow customizing alloc/dealloc
      functions nor the behavior on copy in/out operations.
  
      #### Return modes
-    
-    This operation applies to a single Linalg op that satisfies the 
+
+    This operation applies to a single Linalg op that satisfies the
      `promoteSubviewsPrecondition`, otherwise it fails.
-    
+
      If the operations referred to by the `target` PDLOperation promote
-    properly, the transform succeeds. 
+    properly, the transform succeeds.
  
-    When successful, the return handle points to the $target operation that 
+    When successful, the return handle points to the $target operation that
      was modified inplace.
    }];
  
@@ -381,8 +381,8 @@ def PromoteOp : Op<Transform_Dialect, "structured.promote",
  
    let extraClassDeclaration = [{
      ::mlir::DiagnosedSilenceableFailure applyToOne(
-        ::mlir::linalg::LinalgOp target, 
-        ::llvm::SmallVectorImpl<::mlir::Operation *> &results, 
+        ::mlir::linalg::LinalgOp target,
+        ::llvm::SmallVectorImpl<::mlir::Operation *> &results,
          ::mlir::transform::TransformState &state);
    }];
  }
@@ -395,14 +395,14 @@ def ScalarizeOp : Op<Transform_Dialect, "structured.scalarize",
      scalarized (i.e. their dynamic dimensions tiled by 1).
  
      #### Return modes:
-    
+
      This operation ignores non-Linalg ops and drops them in the return.
      This operation produces `definiteFailure` if the scalarization fails for any
      reason.
      If all the operations referred to by the `target` PDLOperation scalarize
      properly, the transform succeeds. Otherwise the transform silently fails.
  
-    The return handle points to only the subset of successfully produced 
+    The return handle points to only the subset of successfully produced
      tiled-by-1 operations, which can be empty.
  
      This operation does not return handles to the tiled loop.
@@ -420,8 +420,8 @@ def ScalarizeOp : Op<Transform_Dialect, "structured.scalarize",
  
    let extraClassDeclaration = [{
      ::mlir::DiagnosedSilenceableFailure applyToOne(
-        ::mlir::linalg::LinalgOp target, 
-        ::llvm::SmallVectorImpl<::mlir::Operation *> &results, 
+        ::mlir::linalg::LinalgOp target,
+        ::llvm::SmallVectorImpl<::mlir::Operation *> &results,
          ::mlir::transform::TransformState &state);
    }];
  }
@@ -462,43 +462,43 @@ def SplitReductionOp : Op<Transform_Dialect, "structured.split_reduction",
         [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
          TransformEachOpTrait, TransformOpInterface]> {
    let description = [{
-    Indicates that the given `target` op should be transformed with the 
+    Indicates that the given `target` op should be transformed with the
      `splitReduction` transformation and split factor provided as attribute.
  
-    The `splitReduction` transformation splits the first single linalg op 
-    reduction into a parallel and reduction dimension. 
-    A new `linalg.generic` op is created to perform the rest of the reduction. 
-    
+    The `splitReduction` transformation splits the first single linalg op
+    reduction into a parallel and reduction dimension.
+    A new `linalg.generic` op is created to perform the rest of the reduction.
+
      The transformation supports different configurations attributes:
-      - split_factor: the factor by which to split (i.e. the size of the 
+      - split_factor: the factor by which to split (i.e. the size of the
          remaining reduction after splitting).
-      - insert_split_dimension: the dimension in the temporary tensor into 
+      - insert_split_dimension: the dimension in the temporary tensor into
          which the new parallel dimension is inserted.
        - inner_parallel: specifies whether the parallel dimension is before or
          after the reduction dimension in the splitting op.
-      - use_scaling_algorithm: whether to use a scaling based formulation that 
+      - use_scaling_algorithm: whether to use a scaling based formulation that
          does not create an ExpandShapeOp (default: do not use scaling)
-      - use_alloc: whether to use an alloc op to allocate the temporary 
+      - use_alloc: whether to use an alloc op to allocate the temporary
          tensor (default: do not use alloc op)
  
      #### Return modes
-    
+
      This operation ignores non-Linalg ops and drops them in the return.
      This operation produces `definiteFailure` if the splitting fails for any
      reason.
  
      If all the operations referred to by the `target` PDLOperation split
      properly, the transform succeeds. Otherwise the transform silently fails.
-    The 4 returned handles points to only the subset of successfully produced 
+    The 4 returned handles points to only the subset of successfully produced
      computational operations, which can all be empty.
      This 4 returned handles point to:
-      - the init op (or tensor_alloc op if use_alloc = true), 
-      - the fill op used to initialize the neutral element, 
-      - the split op and 
+      - the init op (or tensor_alloc op if use_alloc = true),
+      - the fill op used to initialize the neutral element,
+      - the split op and
        - the result-combining op.
  
      #### Example (default: `use_scaling_algorithm = false, use_alloc = false`):
-    
+
      ```
        %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
                                              affine_map<(d0) -> ()>],
@@ -510,9 +510,9 @@ def SplitReductionOp : Op<Transform_Dialect, "structured.split_reduction",
          linalg.yield %y : f32
        } -> tensor<f32>
      ```
-    
+
      is split into:
-    
+
      ```
        %cst = arith.constant 0.000000e+00 : f32
        %0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into tensor<4x8xf32>
@@ -537,16 +537,16 @@ def SplitReductionOp : Op<Transform_Dialect, "structured.split_reduction",
      ```
  
      #### Example (`use_scaling_algorithm = true, use_alloc = true`):
-    
-    Instead of introducing an ExpandShapeOp, this scaling-based implementation 
+
+    Instead of introducing an ExpandShapeOp, this scaling-based implementation
      rewrites a reduction dimension `k` into `k * split_factor + kk`.
-    The dimension `kk` is added as an extra parallel dimension to the 
+    The dimension `kk` is added as an extra parallel dimension to the
      intermediate output tensor at position `insert_split_dimension`.
  
-    Consider a minimal example where `k` is reduced: 
+    Consider a minimal example where `k` is reduced:
          O(i, j) += I(i, j, k)
      Assume i=3, j=5, k=128, split_factor=16 and insert_split_dimension=0.
-    The compute is rewritten as: 
+    The compute is rewritten as:
        a. O_i(kk, i, j) += I(i, j, 16 * k + kk)
        b. O(i, j) += O_i(kk, i, j)
      The intermediate tensor O_i is of shape (128/16)x3x5 == 8x3x5.
@@ -620,8 +620,8 @@ def SplitReductionOp : Op<Transform_Dialect, "structured.split_reduction",
  
    let extraClassDeclaration = [{
      ::mlir::DiagnosedSilenceableFailure applyToOne(
-        ::mlir::linalg::LinalgOp target, 
-        ::llvm::SmallVectorImpl<::mlir::Operation *> &results, 
+        ::mlir::linalg::LinalgOp target,
+        ::llvm::SmallVectorImpl<::mlir::Operation *> &results,
          ::mlir::transform::TransformState &state);
    }];
  }
@@ -630,9 +630,9 @@ def TileReductionUsingScfOp : Op<Transform_Dialect, "structured.tile_reduction_u
         [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
          TransformEachOpTrait, TransformOpInterface]> {
    let description = [{
-    Indicates that the given `target` op should be transformed with the 
+    Indicates that the given `target` op should be transformed with the
      `tileReduction` transformation with the tile size provided as attribute.
-    
+
      This transformation tiles the `target` along the reduction dimensions. It
      creates a tensor initialized with the identity value. Then it creates nested
      loops with a parallel version of `target` op inside. The parallel op
@@ -645,12 +645,12 @@ def TileReductionUsingScfOp : Op<Transform_Dialect, "structured.tile_reduction_u
      #### Return modes
  
      This 3 returned handles point to:
-      - the fill op used to initialize the neutral element, 
-      - the parallel tiled op and 
+      - the fill op used to initialize the neutral element,
+      - the parallel tiled op and
        - the result-combining op.
  
      #### Example:
-    
+
      ```
        %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                                affine_map<(d0, d1) -> (d0)>],
@@ -663,16 +663,16 @@ def TileReductionUsingScfOp : Op<Transform_Dialect, "structured.tile_reduction_u
        } -> tensor<?xf32>
        return %red : tensor<?xf32>
      ```
-  
+
      is transformed into:
-    
+
      ```
        %0 = tensor.empty(%dim_1) : tensor<?x5xf32>
        %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?x5xf32>) -> tensor<?x5xf32>
        %2 = scf.for %arg2 = %c0 to %dim_0 step %c5 iter_args(%arg3 = %1) -> (tensor<?x5xf32>) {
          %extracted_slice = tensor.extract_slice %1[0, 0] [%dim, 5] [1, 1] : tensor<?x5xf32> to tensor<?x5xf32>
          %extracted_slice_2 = tensor.extract_slice %arg0[0, %arg2] [%dim, 5] [1, 1] : tensor<?x?xf32> to tensor<?x5xf32>
-        %4 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, 
+        %4 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                                affine_map<(d0, d1) -> (d0, d1)>],
          iterator_types = ["parallel", "parallel"]}
          ins(%extracted_slice_2 : tensor<?x5xf32>)
@@ -707,13 +707,13 @@ def TileReductionUsingScfOp : Op<Transform_Dialect, "structured.tile_reduction_u
  
    let extraClassDeclaration = [{
      ::mlir::DiagnosedSilenceableFailure applyToOne(
-        ::mlir::linalg::LinalgOp target, 
-        ::llvm::SmallVectorImpl<::mlir::Operation *> &results, 
+        ::mlir::linalg::LinalgOp target,
+        ::llvm::SmallVectorImpl<::mlir::Operation *> &results,
          ::mlir::transform::TransformState &state);
    }];
  }
  
-def TileReductionUsingForeachThreadOp : 
+def TileReductionUsingForeachThreadOp :
    Op<Transform_Dialect, "structured.tile_reduction_using_foreach_thread",
         [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
          TransformEachOpTrait, TransformOpInterface]> {
@@ -728,16 +728,16 @@ def TileReductionUsingForeachThreadOp :
      All the partial reduction value is are parallel inserted to create a new
      tensor. After the loop a merge operation is created to do a final reduction
      with the partial reductions tensor.
-    
+
      #### Return modes
  
      This 3 returned handles point to:
-      - the fill op used to initialize the neutral element, 
-      - the parallel tiled op and 
+      - the fill op used to initialize the neutral element,
+      - the parallel tiled op and
        - the result-combining op.
  
      #### Example:
-    
+
      ```
        %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                                affine_map<(d0, d1) -> (d0)>],
@@ -750,9 +750,9 @@ def TileReductionUsingForeachThreadOp :
        } -> tensor<?xf32>
        return %red : tensor<?xf32>
      ```
-  
+
      is transformed into:
-    
+
      ```
        %0 = tensor.empty(%dim_1) : tensor<?x5xf32>
        %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?x5xf32>) -> tensor<?x5xf32>
@@ -790,8 +790,8 @@ def TileReductionUsingForeachThreadOp :
  
    let extraClassDeclaration = [{
      ::mlir::DiagnosedSilenceableFailure applyToOne(
-        ::mlir::linalg::LinalgOp target, 
-        ::llvm::SmallVectorImpl<::mlir::Operation *> &results, 
+        ::mlir::linalg::LinalgOp target,
+        ::llvm::SmallVectorImpl<::mlir::Operation *> &results,
          ::mlir::transform::TransformState &state);
    }];
  }
@@ -872,7 +872,7 @@ def TileToForeachThreadOp :
      thought of as tiling by the full size of data.
  
      It is the user's responsibility to ensure that `num_threads/tile_sizes` is
-    a valid tiling specification (i.e. that only tiles parallel dimensions, 
+    a valid tiling specification (i.e. that only tiles parallel dimensions,
      e.g. in the Linalg case).
  
      If non-empty, the `mapping` is added as an attribute to the
@@ -890,10 +890,10 @@ def TileToForeachThreadOp :
      If all the operations referred to by the `target` PDLOperation tile
      successfully, the transform succeeds.
      Otherwise the transform silently fails.
-    
+
      The two returned handles point to only the subset of successfully produced
      tiled operations, which can all be empty.
-    
+
      These two returned handles point to:
        - the new scf.foreach_thread op,
        - the tiled op that implements TilingInterface.
@@ -926,22 +926,22 @@ def TileToForeachThreadOp :
    let builders = [
      OpBuilder<(ins "Value":$target,
                     "ArrayRef<int64_t>":$staticTileSizes,
-                   CArg<"::mlir::transform::TileSizesSpec", 
+                   CArg<"::mlir::transform::TileSizesSpec",
                          "::mlir::transform::TileSizesSpec()">,
                     CArg<"ArrayAttr", "{}">:$mapping)>,
      OpBuilder<(ins "Value":$target,
                     "ArrayRef<OpFoldResult>":$mixedTileSizes,
-                   CArg<"::mlir::transform::TileSizesSpec", 
+                   CArg<"::mlir::transform::TileSizesSpec",
                          "::mlir::transform::TileSizesSpec()">,
                     CArg<"ArrayAttr", "{}">:$mapping)>,
      OpBuilder<(ins "Value":$target,
                     "ArrayRef<int64_t>":$staticNumThreads,
-                   CArg<"::mlir::transform::NumThreadsSpec", 
+                   CArg<"::mlir::transform::NumThreadsSpec",
                          "::mlir::transform::NumThreadsSpec()">,
                     CArg<"ArrayAttr", "{}">:$mapping)>,
      OpBuilder<(ins "Value":$target,
                     "ArrayRef<OpFoldResult>":$mixedNumThreads,
-                   CArg<"::mlir::transform::NumThreadsSpec", 
+                   CArg<"::mlir::transform::NumThreadsSpec",
                          "::mlir::transform::NumThreadsSpec()">,
                     CArg<"ArrayAttr", "{}">:$mapping)>
    ];
@@ -1043,11 +1043,11 @@ def VectorizeOp : Op<Transform_Dialect, "structured.vectorize",
      operation that is contained inside the vectorization target.
  
      This transformation supports the following attributes:
-      - `vectorize_padding`: a UnitAttr to activate the vectorization of 
-      `tensor.pad` ops. Different pipelines may prefer to lower such ops to 
+      - `vectorize_padding`: a UnitAttr to activate the vectorization of
+      `tensor.pad` ops. Different pipelines may prefer to lower such ops to
        loops.
        - `disable_multi_reduction_to_contract_patterns`: a UnitAttr to deactivate
-      the rewrite of `vector.multi_reduction` to `vector.contract`. This is 
+      the rewrite of `vector.multi_reduction` to `vector.contract`. This is
        intended to be used in tests only.
        - `disable_transfer_permutation_map_lowering_patterns`: a UnitAttr to
        deactivate the rewrite of `vector.transfer` with permutation maps into
@@ -1055,16 +1055,16 @@ def VectorizeOp : Op<Transform_Dialect, "structured.vectorize",
        tests only but may be promotoed to a first class attribute in the future.
  
      #### Return modes:
-    
+
      This operation produces `definiteFailure` if vectorization fails for any
      reason.
-    The operation always returns the handle to the target op that is expected 
+    The operation always returns the handle to the target op that is expected
      to be isolated from above.
    }];
  
    let arguments = (ins PDL_Operation:$target,
-                   UnitAttr:$vectorize_padding,                   
-                   UnitAttr:$disable_multi_reduction_to_contract_patterns,                   
+                   UnitAttr:$vectorize_padding,
+                   UnitAttr:$disable_multi_reduction_to_contract_patterns,
                     UnitAttr:$disable_transfer_permutation_map_lowering_patterns);
    let results = (outs PDL_Operation:$transformed);
  
@@ -1075,8 +1075,8 @@ def VectorizeOp : Op<Transform_Dialect, "structured.vectorize",
    ];
    let extraClassDeclaration = [{
      ::mlir::DiagnosedSilenceableFailure applyToOne(
-        ::mlir::Operation *target, 
-        ::llvm::SmallVectorImpl<::mlir::Operation *> &results, 
+        ::mlir::Operation *target,
+        ::llvm::SmallVectorImpl<::mlir::Operation *> &results,
          ::mlir::transform::TransformState &state);
    }];
  }
diff --git a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td

index db4ee53..ba6f211 100644 (file)
--- a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td
+++ b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td
@@ -28,10 +28,10 @@ def NVGPU_Dialect : Dialect {
    let name = "nvgpu";
    let cppNamespace = "::mlir::nvgpu";
    let description = [{
-    The `NVGPU` dialect provides a bridge between higher-level target-agnostic 
-    dialects (GPU and Vector) and the lower-level target-specific dialect 
-    (LLVM IR based NVVM dialect) for NVIDIA GPUs. This allow representing PTX 
-    specific operations while using MLIR high level dialects such as Memref 
+    The `NVGPU` dialect provides a bridge between higher-level target-agnostic
+    dialects (GPU and Vector) and the lower-level target-specific dialect
+    (LLVM IR based NVVM dialect) for NVIDIA GPUs. This allow representing PTX
+    specific operations while using MLIR high level dialects such as Memref
      and Vector for memory and target-specific register operands, respectively.
    }];
  
@@ -47,7 +47,7 @@ class NVGPU_Type<string name, string typeMnemonic,
    let mnemonic = typeMnemonic;
  }
  
-def NVGPU_DeviceAsyncToken : NVGPU_Type<"DeviceAsyncToken",  
+def NVGPU_DeviceAsyncToken : NVGPU_Type<"DeviceAsyncToken",
                                          "device.async.token", []> {
    let summary = "device async token type";
    let description = [{
@@ -68,11 +68,11 @@ class NVGPU_Op<string mnemonic, list<Trait> traits = []> :
  
  def NVGPU_LdMatrixOp : NVGPU_Op<"ldmatrix", [
                                  MemoryEffects<[MemRead]>,
-                                PredOpTrait<"srcMemref and res have same element type", 
+                                PredOpTrait<"srcMemref and res have same element type",
                                              TCresVTEtIsSameAsOp<0, 0>>]> {
    let description = [{
      The `nvgpu.ldmatrix` op represents loading a matrix fragment from
-    memory to registers. The source and result type must be compatible 
+    memory to registers. The source and result type must be compatible
      with lowering to the `nvvm.ldmatrix` instruction. This op represents
      the distributed version of a `vector.transfer_read` as an intermediate
      step between lowering from `vector.transfer_read` to `nvvm.ldmatrix`.
@@ -100,10 +100,10 @@ def NVGPU_LdMatrixOp : NVGPU_Op<"ldmatrix", [
  
  class NVGPU_MmaSyncOp<string mnemonic> :
          NVGPU_Op<mnemonic,  [Pure,
-                             PredOpTrait<"matrixA and matrixB have same element type", 
+                             PredOpTrait<"matrixA and matrixB have same element type",
                                           TCopVTEtIsSameAs<0, 1>>]> {
    code extraBaseClassDeclaration = [{
-    std::array<int64_t, 3> getMmaShapeAsArray() {      
+    std::array<int64_t, 3> getMmaShapeAsArray() {
        ArrayAttr mmaShape = this->getMmaShape();
        assert(mmaShape.size() == 3 && "mmaShape should be three integers");
        return {mmaShape[0].cast<IntegerAttr>().getInt(),
@@ -118,12 +118,12 @@ class NVGPU_MmaSyncOp<string mnemonic> :
  def NVGPU_MmaSyncOp : NVGPU_MmaSyncOp<"mma.sync"> {
    let description = [{
      The `nvgpu.mma.sync` op represents the warp-level matrix-multiply-and-
-    accumulate (mma) operation that is compatible with `nvvm.mma.sync`. 
-    The operands and results vector sizes are thread-level onwership to 
-    the warp-level mma operation shape. `mmaShape` attribute holds the 
+    accumulate (mma) operation that is compatible with `nvvm.mma.sync`.
+    The operands and results vector sizes are thread-level onwership to
+    the warp-level mma operation shape. `mmaShape` attribute holds the
      warp-level matrix-multiply shape.
-    
-    The `nvgpu.mma.sync` op serves as an intermediate point between lowering from 
+
+    The `nvgpu.mma.sync` op serves as an intermediate point between lowering from
      `vector.contract` to `nvvm.mma.sync`.
  
      This operation is meant to follow the semantic of described here:
@@ -132,13 +132,13 @@ def NVGPU_MmaSyncOp : NVGPU_MmaSyncOp<"mma.sync"> {
      Example:
  
      ```mlir
-    %res = nvgpu.mma.sync (%matrixA, %matrixB, %matrixC) {mmaShape = [16, 8, 16]} : 
+    %res = nvgpu.mma.sync (%matrixA, %matrixB, %matrixC) {mmaShape = [16, 8, 16]} :
          (vector<4x2xf16>, vector<2x2xf16>, vector<2x2xf32>) -> vector<2x2xf32>
      ```
    }];
-  let arguments = (ins AnyVector:$matrixA, 
+  let arguments = (ins AnyVector:$matrixA,
                         AnyVector:$matrixB,
-                       AnyVector:$matrixC, 
+                       AnyVector:$matrixC,
                         I64ArrayAttr:$mmaShape,
                         OptionalAttr<UnitAttr>:$tf32Enabled
                         );
@@ -146,9 +146,9 @@ def NVGPU_MmaSyncOp : NVGPU_MmaSyncOp<"mma.sync"> {
    let results = (outs AnyVector:$res);
  
    let builders = [
-    OpBuilder<(ins "Value":$matrixA, 
-                   "Value":$matrixB, 
-                   "Value":$matrixC, 
+    OpBuilder<(ins "Value":$matrixA,
+                   "Value":$matrixB,
+                   "Value":$matrixC,
                     "ArrayAttr":$mmaShape)>
    ];
  
@@ -182,13 +182,13 @@ def NVGPU_MmaSparseSyncOp : NVGPU_MmaSyncOp<"mma.sp.sync"> {
  
    Example (targetingthe f16 16x8x32 `mma.sp` PTX instruction):
  
-  ```mlir  
+  ```mlir
    nvgpu.mma.sp.sync (%a, %b, %c) metadata (%meta) {mmaShape = [16, 8, 32]} :
-    (vector<4x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>    
-  ```  
+    (vector<4x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>
+  ```
    }];
  
-  let arguments = (ins AnyVector:$matrixA,                       
+  let arguments = (ins AnyVector:$matrixA,
                         AnyVector:$matrixB,
                         AnyVector:$matrixC,
                         NVGPU_MmaSparseSyncMetadataType:$sparseMetadata,
@@ -220,24 +220,24 @@ def NVGPU_DeviceAsyncCopyOp : NVGPU_Op<"device_async_copy", [
    let summary = "device-side asynchronous copy";
    let description = [{
      The `nvgpu.device_async_copy` op initiates an asynchronous copy operation of
-    elements from source (global memory) to the destination (shared memory) 
+    elements from source (global memory) to the destination (shared memory)
      without blocking the thread. The async copy is added to a group.
  
      This op is meant to be used with `nvgpu.device_async_create_group` and
      `nvgpu.device_async_wait` to synchronize copies as explained in those ops
-    descriptions. 
-    
-    `bypassL1` attribute is hint to the hardware to bypass the L1 cache during 
-    async copy, this hint may be ignored by the hardware. 
-    
-    `dstElements` attribute is the total number of elements written to 
+    descriptions.
+
+    `bypassL1` attribute is hint to the hardware to bypass the L1 cache during
+    async copy, this hint may be ignored by the hardware.
+
+    `dstElements` attribute is the total number of elements written to
      destination (shared memory).
  
-    `srcElements` argument is the total number of elements read from 
+    `srcElements` argument is the total number of elements read from
      source (global memory).
-    
-    `srcElements` is an optional argument and when present the op only reads 
-    `srcElements` number of elements from the source (global memory) and zero fills 
+
+    `srcElements` is an optional argument and when present the op only reads
+    `srcElements` number of elements from the source (global memory) and zero fills
      the rest of the elements in the destination (shared memory).
  
      In order to do a copy and wait for the result we need the following
diff --git a/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td b/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td

index c85687e..2332d80 100644 (file)
--- a/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
+++ b/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
@@ -456,7 +456,7 @@ def PDL_RangeOp : PDL_Op<"range", [Pure, HasParent<"pdl::RewriteOp">]> {
  
      // Construct a range of types.
      %typeRange = pdl.range %inputType, %inputRange : !pdl.type, !pdl.range<type>
-    
+
      // Construct an empty range of types.
      %valueRange = pdl.range : !pdl.range<type>
      ```
diff --git a/mlir/include/mlir/Dialect/PDLInterp/IR/PDLInterpOps.td b/mlir/include/mlir/Dialect/PDLInterp/IR/PDLInterpOps.td

index 96d631b..42a48cd 100644 (file)
--- a/mlir/include/mlir/Dialect/PDLInterp/IR/PDLInterpOps.td
+++ b/mlir/include/mlir/Dialect/PDLInterp/IR/PDLInterpOps.td
@@ -1013,7 +1013,7 @@ def PDLInterp_CreateRangeOp : PDLInterp_Op<"create_range", [Pure]> {
  
      // Construct a range of types.
      %typeRange = pdl_interp.create_range %inputType, %inputRange : !pdl.type, !pdl.range<type>
-    
+
      // Construct an empty range of types.
      %valueRange = pdl_interp.create_range : !pdl.range<type>
      ```
diff --git a/mlir/include/mlir/Dialect/SCF/IR/DeviceMappingInterface.td b/mlir/include/mlir/Dialect/SCF/IR/DeviceMappingInterface.td

index 6cdc8cb..e7504fd 100644 (file)
--- a/mlir/include/mlir/Dialect/SCF/IR/DeviceMappingInterface.td
+++ b/mlir/include/mlir/Dialect/SCF/IR/DeviceMappingInterface.td
@@ -23,15 +23,15 @@ def DeviceMappingAttrInterface : AttrInterface<"DeviceMappingAttrInterface"> {
    let cppNamespace = "::mlir";
    let description = [{
      Attribute interface describing how to map a region to a processing unit.
-    
-    It is intended to be a generic mechanism for binding regions to execution 
-    units of an actual or virtual device. Each device first expresses its own 
-    mappings, and those mappings must implement this interface. These mappings 
-    can be used by the device-specific code generators and the desired regions 
+
+    It is intended to be a generic mechanism for binding regions to execution
+    units of an actual or virtual device. Each device first expresses its own
+    mappings, and those mappings must implement this interface. These mappings
+    can be used by the device-specific code generators and the desired regions
      can be connected to the given processing unit.
-    
-    Currently, `scf.foreach_thread` uses this interface to express the mapping 
-    of the loops it contains to the GPU's parallelism units such as threads and 
+
+    Currently, `scf.foreach_thread` uses this interface to express the mapping
+    of the loops it contains to the GPU's parallelism units such as threads and
      thread blocks.
    }];
  
@@ -44,8 +44,8 @@ def DeviceMappingAttrInterface : AttrInterface<"DeviceMappingAttrInterface"> {
    ];
  }
  
-def DeviceMappingArrayAttr : 
-  TypedArrayAttrBase<DeviceMappingAttrInterface, 
+def DeviceMappingArrayAttr :
+  TypedArrayAttrBase<DeviceMappingAttrInterface,
    "Device Mapping array attribute"> { }
  
  #endif // MLIR_DEVICEMAPPINGINTERFACE
diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td

index af4db68..09a4969 100644 (file)
--- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
+++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
@@ -382,10 +382,10 @@ def ForeachThreadOp : SCF_Op<"foreach_thread", [
      An optional `mapping` is an attribute array that specifies processing units
      with their dimension, how it remaps 1-1 to a set of concrete processing
      element resources (e.g. a CUDA grid dimension or a level of concrete nested
-    async parallelism). It is expressed via any attribute that implements the 
-    device mapping interface. It is the reponsibility of the lowering mechanism 
-    to interpret the `mapping` attributes in the context of the concrete target 
-    the op is lowered to, or to ignore it when the specification is ill-formed 
+    async parallelism). It is expressed via any attribute that implements the
+    device mapping interface. It is the reponsibility of the lowering mechanism
+    to interpret the `mapping` attributes in the context of the concrete target
+    the op is lowered to, or to ignore it when the specification is ill-formed
      or unsupported for a particular target.
  
      The only allowed terminator is `scf.foreach_thread.perform_concurrently`.
@@ -445,7 +445,7 @@ def ForeachThreadOp : SCF_Op<"foreach_thread", [
  
      ```mlir
      //
-    // Sequential context. Here `mapping` is expressed as GPU thread mapping 
+    // Sequential context. Here `mapping` is expressed as GPU thread mapping
      // attributes
      //
      %matmul_and_pointwise:2 = scf.foreach_thread (%thread_id_1, %thread_id_2) in
diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td

index 53c8448..c9c5fb1 100644 (file)
--- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
+++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
@@ -48,7 +48,7 @@ class Tensor_OpWithOffsetSizesAndStrides<string mnemonic,
  //===----------------------------------------------------------------------===//
  
  def Tensor_CastOp : Tensor_Op<"cast", [
-    DeclareOpInterfaceMethods<CastOpInterface>, 
+    DeclareOpInterfaceMethods<CastOpInterface>,
      DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
      Pure
    ]> {
@@ -257,7 +257,7 @@ def Tensor_ExtractSliceOp : Tensor_OpWithOffsetSizesAndStrides<"extract_slice",
      DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
      DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>,
      AttrSizedOperandSegments,
-    Pure, 
+    Pure,
      OffsetSizeAndStrideOpInterface
    ]> {
    let summary = "extract slice operation";
@@ -364,7 +364,7 @@ def Tensor_ExtractSliceOp : Tensor_OpWithOffsetSizesAndStrides<"extract_slice",
      // Build an ExtractSliceOp with mixed static and dynamic entries packed in
      // a Range vector.
      OpBuilder<(ins "Value":$source, "ArrayRef<Range>":$ranges,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,    
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
    ];
  
    let extraClassDeclaration = extraBaseClassDeclaration # [{
@@ -502,19 +502,19 @@ def Tensor_GatherOp : Tensor_Op<"gather", [
      tensor at the given indices.
  
      In its most general form, the tensor of indices specifies all the coordinates
-    of every element to extract (i.e. COO format, without the payload). 
+    of every element to extract (i.e. COO format, without the payload).
      The indices are expected to be confined to coordinate values that fit the
      range of the `source` tensor, otherwise the behavior is undefined.
  
      The leading dimensions of the index tensor give the result tensor its leading
-    dimensions. The trailing dimensions of the result tensor are obtained from 
-    the source tensor by omitting the dimensions specified in `gather_dims` 
+    dimensions. The trailing dimensions of the result tensor are obtained from
+    the source tensor by omitting the dimensions specified in `gather_dims`
      (rank-reducing semantics) or setting them to `1` (rank-preserving semantics)
      (see examples).
      The trailing dimension of the index tensor contains the coordinates and is
      expected to have its size equal to the number of dimensions being gathered.
      This convention allows an idiomatic specification and lowering of "gathering
-    multiple N-D slices from the source tensor". 
+    multiple N-D slices from the source tensor".
  
      Note: in the examples below, we separate out the indexing part of the tensor
      type by a whitespace for readability purposes.
@@ -522,7 +522,7 @@ def Tensor_GatherOp : Tensor_Op<"gather", [
      Example:
  
      ```mlir
-        // For each 1x2 triple of coordinates in %indices, extract the 
+        // For each 1x2 triple of coordinates in %indices, extract the
          // element (i.e. 0-D subset) at the coordinates triple in %source.
          //
          %out = tensor.gather %source[%indices] gather_dims([0, 1, 2]) :
@@ -541,20 +541,20 @@ def Tensor_GatherOp : Tensor_Op<"gather", [
          // slice %source[*, %indices[...]:%indices[...] + 1, *] with the indices
          // corresponding to the `gather_dims` attribute specified by %indices.
          //
-        %out = tensor.gather %source[%indices] gather_dims([1]) : 
+        %out = tensor.gather %source[%indices] gather_dims([1]) :
            (tensor<3x4x5xf32>, tensor<6x7x 1xindex>) -> tensor<6x7x 3x1x5xf32>
  
          // Note: result type may be further rank-reduced to tensor<6x7x 3x5xf32>.
      ```
  
      The dimensions specified in the gather_dims attribute are ones for which the
-    result tensor has size `1`. 
+    result tensor has size `1`.
      I.e. if the source type is `axbxcxd` and the coordinates are [1, 3], then
      the shape suffix is `ax1xcx1`.
      Gather also allows rank-reducing semantics where the shape `ax1xcx1` can be
      further simplified to `axc`.
  
-    The elemental type of the indices tensor can be any integer type. 
+    The elemental type of the indices tensor can be any integer type.
      In the absence of target-specific or problem specific information the default
      type one should use is `index`.
  
@@ -565,50 +565,50 @@ def Tensor_GatherOp : Tensor_Op<"gather", [
      Incorrectly setting the `unique` attribute when the coordinates are not truly
      unique is undefined behavior.
  
-    Only full slices are meant to be supported by this op, if one desires 
+    Only full slices are meant to be supported by this op, if one desires
      partial slices (e.g. strided windows) one should compose this op with other
      tensor ops (e.g. tensor.extract_slice). This is to avoid a slippery slope of
      complexity that would make the op unusable in practice.
  
-    At the tensor-level, the index tensor is specified in an AoS form (i.e. 
-    coordinate tuple is the most minor). It is the responsibility of further 
+    At the tensor-level, the index tensor is specified in an AoS form (i.e.
+    coordinate tuple is the most minor). It is the responsibility of further
      lowerings and bufferiation to implement various concrete layouts.
  
      Note: As currently specified, the operation must lower to an abstraction that
      performs copies to the output tensor. This is because the buffer type system
-    is currently not rich enough to allow multiple non-contiguous views in the 
+    is currently not rich enough to allow multiple non-contiguous views in the
      same type. This is visible more clearly in a notional buffer version of the
      op:
  
      ```mlir
          // memref<?x4x1xf32> is a contiguous buffer of ?x4x1 elements.
          // gather from random source slices must copy to the contiguous output.
-        %out = memref.gather %source[%indices] gather_dims([1]) : 
+        %out = memref.gather %source[%indices] gather_dims([1]) :
            (memref<4x4xf32>, memref<?x 1xindex>) -> memref<?x 4x1xf32>
  
-        // Nested buffer support would allow gather to directly index into the 
+        // Nested buffer support would allow gather to directly index into the
          // source buffer (i.e. represent a jagged view into the source).
-        %out = memref.gather %source[%indices] gather_dims([1]) : 
+        %out = memref.gather %source[%indices] gather_dims([1]) :
            (memref<4x4xf32>, memref<?x 1xindex>) -> memref<? x memref<4x1xf32>>
      ```
    }];
  
-  let arguments = (ins AnyRankedTensor:$source, 
+  let arguments = (ins AnyRankedTensor:$source,
                         RankedTensorOf<[AnySignlessIntegerOrIndex]>:$indices,
                         DenseI64ArrayAttr:$gather_dims,
                         UnitAttr:$unique);
    let results = (outs AnyRankedTensor:$result);
  
    let assemblyFormat = [{
-    $source `[` $indices `]` 
+    $source `[` $indices `]`
        `gather_dims` `(` $gather_dims `)`
-      (`unique` $unique^)?  
+      (`unique` $unique^)?
        attr-dict
      `:` functional-type(operands, results)
    }];
  
    let extraClassDeclaration = [{
-    // TODO: InferTypeOpInterface once enough confidence is built with 
+    // TODO: InferTypeOpInterface once enough confidence is built with
      // tensor<tensor> and its lwoering to memref<memref>.
      static RankedTensorType inferResultType(RankedTensorType sourceType,
                                              RankedTensorType indicesType,
@@ -739,9 +739,9 @@ def Tensor_InsertOp : Tensor_Op<"insert", [
  def Tensor_InsertSliceOp : Tensor_OpWithOffsetSizesAndStrides<"insert_slice", [
      DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
      DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>,
-    AttrSizedOperandSegments, 
+    AttrSizedOperandSegments,
      DestinationStyleOpInterface,
-    Pure, 
+    Pure,
      OffsetSizeAndStrideOpInterface,
      TypesMatchWith<"expected result type to match dest type",
                     "dest", "result", "$_self">
@@ -1127,7 +1127,7 @@ def Tensor_CollapseShapeOp : Tensor_ReassociativeReshapeOp<"collapse_shape"> {
  
  def Tensor_PadOp : Tensor_Op<"pad", [
      DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
-    AttrSizedOperandSegments, 
+    AttrSizedOperandSegments,
      Pure,
      SingleBlockImplicitTerminator<"mlir::tensor::YieldOp">]> {
    let summary = "tensor pad operation";
@@ -1475,7 +1475,7 @@ def Tensor_ScatterOp : Tensor_Op<"scatter", [
      DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
      Pure
    ]> {
-  let summary = 
+  let summary =
      "scatter a tensor into a destination tensor at specified indices";
    let description = [{
      The `scatter` operation inserts a `source` tensor into a `dest` tensor at
@@ -1486,13 +1486,13 @@ def Tensor_ScatterOp : Tensor_Op<"scatter", [
      The indices are expected to be confined to coordinate values that fit the
      range of the `dest` tensor, otherwise the behavior is undefined.
  
-    The leading dimensions of the index tensor must match that of the dest 
+    The leading dimensions of the index tensor must match that of the dest
      tensor. The trailing dimensions of the dest tensor must match those of the
-    source tensor by omitting the dimensions specified in scatter_dims 
+    source tensor by omitting the dimensions specified in scatter_dims
      (rank-reducing semantics) or setting them to `1` (rank-preserving semantics)
-    (see examples). 
-    This convention allows an idiomatic specification and lowering of 
-    "scattering multiple N-D slices into the dest tensor". 
+    (see examples).
+    This convention allows an idiomatic specification and lowering of
+    "scattering multiple N-D slices into the dest tensor".
      The result type must match the type of the dest tensor.
  
      Note: in the examples below, we separate out the indexing part of the tensor
@@ -1501,7 +1501,7 @@ def Tensor_ScatterOp : Tensor_Op<"scatter", [
      Example:
  
      ```mlir
-        // For each 1x2 triple of coordinates in %indices, insert the 
+        // For each 1x2 triple of coordinates in %indices, insert the
          // element (i.e. 0-D subset) at the coordinates triple in %dest.
          //
          %out = tensor.scatter %source into %dest[%indices]
@@ -1523,19 +1523,19 @@ def Tensor_ScatterOp : Tensor_Op<"scatter", [
          // indices corresponding to the scatter_dims attribute specified by
          // %indices.
          //
-        %out = tensor.scatter %source into %dest[%indices] scatter_dims([1]) unique : 
+        %out = tensor.scatter %source into %dest[%indices] scatter_dims([1]) unique :
            (tensor<3x 4x1x6xf32>, tensor<4x5x6xf32>, tensor<3x 1xindex>)
              -> tensor<4x5x6xf32>
      ```
  
      The dimensions specified in the scatter_dims attribute are ones for which the
-    source tensor has size `1`. 
+    source tensor has size `1`.
      I.e. if the dest type is `axbxcxd` and the coordinates are [1, 3], then
      the source type suffix is `ax1xcx1`.
      Sactter also allows rank-reducing semantics where the shape `ax1xcx1` can be
      further simplified to `axc`.
  
-    The elemental type of the indices tensor can be any integer type. 
+    The elemental type of the indices tensor can be any integer type.
      In the absence of target-specific or problem specific information the default
      type one should use is `index`.
  
@@ -1545,18 +1545,18 @@ def Tensor_ScatterOp : Tensor_Op<"scatter", [
      coordinates are statically guaranteed to be unique at runtime. If coordinates
      are not truly unique at runtime, the behavior is undefined.
  
-    Only full slices are meant to be supported by this op, if one desires 
+    Only full slices are meant to be supported by this op, if one desires
      partial slices (e.g. strided windows) one should compose this op with other
      tensor ops (e.g. tensor.insert_slice). This is to avoid a slippery slope of
      complexity that would make the op unusable in practice.
  
-    At the tensor-level, the index tensor is specified in an AoS form (i.e. 
-    coordinate tuple is the most minor). It is the responsibility of further 
+    At the tensor-level, the index tensor is specified in an AoS form (i.e.
+    coordinate tuple is the most minor). It is the responsibility of further
      lowerings and bufferiation to implement various concrete layouts.
  
      Note: As currently specified, the operation must lower to an abstraction that
      performs copies to the output tensor. This is because the buffer type system
-    is currently not rich enough to allow multiple non-contiguous views in the 
+    is currently not rich enough to allow multiple non-contiguous views in the
      same type. This is visible more clearly in a notional buffer version of the
      op:
  
@@ -1565,26 +1565,26 @@ def Tensor_ScatterOp : Tensor_Op<"scatter", [
          // random dest slices must copy to the contiguous dest.
          //
          some_side_effecting_op_writing_into %source, ...: memref<3x 4xf32>
-        memref.scatter %source into %dest[%indices] scatter_dims([1]) unique : 
+        memref.scatter %source into %dest[%indices] scatter_dims([1]) unique :
            (memref<3x 4xf32>, memref<?x 4xf32>, memref<?x 1xindex>)
  
          // Nested buffer support in the producing op would allow writing directly
          // into the dest buffer.
-        %v = some_nested_buffer_view_op %dest[%indices] scatter_dims([1]) unique : 
+        %v = some_nested_buffer_view_op %dest[%indices] scatter_dims([1]) unique :
            memref<? x memref<4xf32>>
          some_side_effecting_op_writing_into %v, ...: memref<? x memref<4xf32>>
      ```
    }];
  
-  let arguments = (ins AnyRankedTensor:$source, 
-                       AnyRankedTensor:$dest, 
+  let arguments = (ins AnyRankedTensor:$source,
+                       AnyRankedTensor:$dest,
                         RankedTensorOf<[AnySignlessIntegerOrIndex]>:$indices,
                         DenseI64ArrayAttr:$scatter_dims,
                         UnitAttr:$unique);
    let results = (outs AnyRankedTensor:$result);
  
    let assemblyFormat = [{
-    $source `into` $dest `[` $indices `]` 
+    $source `into` $dest `[` $indices `]`
        `scatter_dims` `(` $scatter_dims `)`
        (`unique` $unique^)?
        attr-dict
@@ -1673,49 +1673,49 @@ class Tensor_RelayoutOp<string mnemonic, list<Trait> traits = []> :
    code commonExtraClassDeclaration = [{
      int64_t getSourceRank() { return getSource().getType().getRank(); };
      int64_t getDestRank() { return getDest().getType().getRank(); };
-    RankedTensorType getSourceType() { 
+    RankedTensorType getSourceType() {
        return getSource().getType().cast<RankedTensorType>(); };
      RankedTensorType getDestType() {
        return getDest().getType().cast<RankedTensorType>(); };
  
-    /// Return position for init operand. Init operand is `dest`. 
+    /// Return position for init operand. Init operand is `dest`.
      std::pair<int64_t, int64_t> getDpsInitsPositionRange() {
        return {1, 2}; // `dest` operand
      }
  
      /// Interface method for ConditionallySpeculatable.
-    Speculation::Speculatability getSpeculatability();   
- 
-    /// Return a mapping from positions `inner_dims_pos` to their 
+    Speculation::Speculatability getSpeculatability();
+
+    /// Return a mapping from positions `inner_dims_pos` to their
      /// tile factors.
      DenseMap<int64_t, OpFoldResult> getDimAndTileMapping();
-    
+
      /// Return the tile sizes as OpFoldResult.
      SmallVector<OpFoldResult> getMixedTiles();
-    
-    /// Return the tile sizes as `int64_t`. If a tile size is dynamic 
-    /// a sentinel `kDynamic` is introduced at that position in 
+
+    /// Return the tile sizes as `int64_t`. If a tile size is dynamic
+    /// a sentinel `kDynamic` is introduced at that position in
      /// the returned vector.
      SmallVector<int64_t> getStaticTiles();
    }];
-  
+
    let hasVerifier = 1;
  }
  
  def Tensor_PackOp : Tensor_RelayoutOp<"pack", [
      AttrSizedOperandSegments]> {
    let summary = "tensor pack operation";
-  let description = [{ 
+  let description = [{
      The pack operation converts an input tensor to a higher-dimensional tensor
      with a tiled and packed layout. The mandatory `inner_dims_pos` attribute
      specifies a permutation for the original dimensions, while `inner_tiles` is the
      tiling factor for each dimension. The optional attribute `outer_dims_perm`
      specifies the order for the tiled data dimension, while the attribute
      `padding_value` specifies a padding value at the boundary on non-perfectly
-    divisible dimensions. Padding is optional: 
-    - If absent, it is UB if the tile does not perfectly divide the dimension.  
-    - If present, it will pad along high dimensions (high-padding) to make the 
-      tile complete. 
+    divisible dimensions. Padding is optional:
+    - If absent, it is UB if the tile does not perfectly divide the dimension.
+    - If present, it will pad along high dimensions (high-padding) to make the
+      tile complete.
  
      Example NC_to_NCnc:
  
@@ -1752,9 +1752,9 @@ def Tensor_PackOp : Tensor_RelayoutOp<"pack", [
                         DenseI64ArrayAttr:$static_inner_tiles);
    let results = (outs AnyRankedTensor:$result);
    let assemblyFormat = [{
-    $source 
+    $source
      (`padding_value` `(` $padding_value^ `:` type($padding_value) `)`)?
-    (`outer_dims_perm` `=` $outer_dims_perm^)?  
+    (`outer_dims_perm` `=` $outer_dims_perm^)?
      `inner_dims_pos` `=` $inner_dims_pos
      `inner_tiles` `=`
      custom<DynamicIndexList>($inner_tiles, $static_inner_tiles)
@@ -1771,12 +1771,12 @@ def Tensor_PackOp : Tensor_RelayoutOp<"pack", [
  
    let extraClassDeclaration = commonExtraClassDeclaration # [{
      // Method to get the `ShapedType` of the result based on the inner tiles,
-    // position of the inner tiles (innerDimsPos)  and interchange vector of  
+    // position of the inner tiles (innerDimsPos)  and interchange vector of
      // outer loops (outerDimsPerm).
      static ShapedType inferPackedType(ShapedType sourceType,
          ArrayRef<int64_t> innerTileSizes, ArrayRef<int64_t> innerDimsPos,
          ArrayRef<int64_t> outerDimsPerm = {});
-  }]; 
+  }];
  }
  
  //===----------------------------------------------------------------------===//
@@ -1803,7 +1803,7 @@ def Tensor_UnPackOp : Tensor_RelayoutOp<"unpack"> {
      Example CK to KCck:
  
      ```mlir
-    tensor.unapck %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] 
+    tensor.unapck %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
        inner_tiles = [8, 32] into %dest : tensor<8x16x8x32xf32> -> tensor<128x256xf32>
      ```
    }];
diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td

index 0f08983..e029e42 100644 (file)
--- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td
+++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td
@@ -73,7 +73,7 @@ class Tosa_Attr<string attrName, string attrMnemonic, list<Trait> traits = []>
  // This quantization attribute expresses numerical behavior of operators where
  // the operator has a numerical relationship between a single input and output.
  // For example: tosa.negate.
-def Tosa_UnaryOpQuantizationAttr 
+def Tosa_UnaryOpQuantizationAttr
      : Tosa_Attr<"UnaryOpQuantization", "unary_quant"> {
    let summary = "Attribute for UnaryOp quantization information.";
    let parameters = (ins "int64_t":$input_zp, "int64_t":$output_zp);
@@ -91,14 +91,14 @@ def Tosa_UnaryOpQuantizationAttr
  // the inputs.
  // The scaling of their accumulator output is done using an explicit
  // tosa.rescale operator that scales the accumulator result to output scale.
-def Tosa_ConvOpQuantizationAttr 
-    : Tosa_Attr<"ConvOpQuantization", "conv_quant"> { 
+def Tosa_ConvOpQuantizationAttr
+    : Tosa_Attr<"ConvOpQuantization", "conv_quant"> {
    let summary = "Attribute for Conv type op quantization information.";
    let parameters = (ins "int64_t":$input_zp, "int64_t":$weight_zp);
    let assemblyFormat = "`<` struct(params) `>`";
  }
  
-def Tosa_MatMulOpQuantizationAttr 
+def Tosa_MatMulOpQuantizationAttr
      : Tosa_Attr< "MatMulOpQuantization", "matmul_quant"> {
    let summary = "Attribute for MatMulOp quantization information.";
    let parameters = (ins "int64_t":$a_zp, "int64_t":$b_zp);
diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaUtilOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaUtilOps.td

index a04e5d6..f1a0a19 100644 (file)
--- a/mlir/include/mlir/Dialect/Tosa/IR/TosaUtilOps.td
+++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaUtilOps.td
@@ -6,9 +6,9 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// This file defines codegen utility operators for the TOSA dialect. 
-// These operators are not part of the formal TOSA specification and 
-// are intended to aid code generation from TOSA. 
+// This file defines codegen utility operators for the TOSA dialect.
+// These operators are not part of the formal TOSA specification and
+// are intended to aid code generation from TOSA.
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td

index c1334be..7fd2f9b 100644 (file)
--- a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td
@@ -57,7 +57,7 @@ def TosaOptionalDecompositions
    : Pass<"tosa-optional-decompositions", "func::FuncOp"> {
    let summary = "Applies Tosa operations optional decompositions";
    let description = [{
-    Pass to apply the Tosa operations decompositions 
+    Pass to apply the Tosa operations decompositions
      exposed as populate functions in include/mlir/Dialect/Tosa/Transforms/Passes.h
    }];
  
diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td

index d81bea6..f6788de 100644 (file)
--- a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td
+++ b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td
@@ -54,7 +54,7 @@ def AlternativesOp : TransformDialectOp<"alternatives",
      remaining alternatives. Note that the payload IR outside of the given scope
      is not necessarily in the valid state, or even accessible to the
      transformation.
-    
+
      The changes to the IR within the scope performed by transforms in the failed
      alternative region are reverted before attempting the next region.
      Practically, this is achieved by cloning the scope. Therefore it is advised
@@ -146,7 +146,7 @@ def ForeachOp : TransformDialectOp<"foreach",
    let arguments = (ins TransformTypeInterface:$target);
    let results = (outs Variadic<TransformTypeInterface>:$results);
    let regions = (region SizedRegion<1>:$body);
-  let assemblyFormat = 
+  let assemblyFormat =
      "$target `:` type($target) (`->` type($results)^)? $body attr-dict";
    let hasVerifier = 1;
  
@@ -238,7 +238,7 @@ def SplitHandlesOp : TransformDialectOp<"split_handles",
       DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
    let summary = "Splits handles from a union of payload ops to a list";
    let description = [{
-    Creates `num_result_handles` transform IR handles extracted from the 
+    Creates `num_result_handles` transform IR handles extracted from the
      `handle` operand. The resulting Payload IR operation handles are listed
      in the same order as the operations appear in the source `handle`.
      This is useful for ensuring a statically known number of operations are
@@ -246,7 +246,7 @@ def SplitHandlesOp : TransformDialectOp<"split_handles",
      that can be further manipulated in isolation.
  
      This operation succeeds and returns `num_result_handles` if the statically
-    specified `num_result_handles` corresponds to the dynamic number of 
+    specified `num_result_handles` corresponds to the dynamic number of
      operations contained in the source `handle`. Otherwise it silently fails.
    }];
  
@@ -259,7 +259,7 @@ def SplitHandlesOp : TransformDialectOp<"split_handles",
    ];
  
    let assemblyFormat = [{
-    $handle `in` `[` $num_result_handles `]` 
+    $handle `in` `[` $num_result_handles `]`
      attr-dict `:` functional-type(operands, results)
    }];
  }
@@ -286,7 +286,7 @@ def PDLMatchOp : TransformDialectOp<"pdl_match",
    }];
  
    let arguments = (ins
-    Arg<TransformTypeInterface, "Payload IR scope to match within">:$root, 
+    Arg<TransformTypeInterface, "Payload IR scope to match within">:$root,
      SymbolRefAttr:$pattern_name);
    let results = (outs
      Res<TransformTypeInterface, "Handle to the matched Payload IR ops">:$matched);
@@ -335,7 +335,7 @@ def ReplicateOp : TransformDialectOp<"replicate",
  
      This transformation is useful to "align" the sizes of payload IR lists
      before a transformation that expects, e.g., identically-sized lists. For
-    example, a transformation may be parameterized by same notional per-target 
+    example, a transformation may be parameterized by same notional per-target
      size computed at runtime and supplied as another handle, the replication
      allows this size to be computed only once and used for every target instead
      of replicating the computation itself.
@@ -488,7 +488,7 @@ def YieldOp : TransformDialectOp<"yield", [Terminator]> {
      any transformation on the payload IR and is used for flow purposes only.
    }];
  
-  let arguments = (ins 
+  let arguments = (ins
      Arg<Variadic<TransformTypeInterface>, "Operation handles yielded back to the parent",
          [TransformMappingRead]>:$operands);
    let assemblyFormat = "operands attr-dict (`:` type($operands)^)?";
diff --git a/mlir/include/mlir/IR/BuiltinTypes.td b/mlir/include/mlir/IR/BuiltinTypes.td

index c37463c..15749cf 100644 (file)
--- a/mlir/include/mlir/IR/BuiltinTypes.td
+++ b/mlir/include/mlir/IR/BuiltinTypes.td
@@ -421,7 +421,7 @@ def Builtin_MemRef : Builtin_Type<"MemRef", [
      affine form through explicit linearization:
  
      ```mlir
-    affine_map<(d0, ... dN)[offset, stride0, ... strideN] -> 
+    affine_map<(d0, ... dN)[offset, stride0, ... strideN] ->
                  (offset + d0 * stride0 + ... dN * strideN)>
      ```
  
diff --git a/mlir/include/mlir/IR/OpAsmInterface.td b/mlir/include/mlir/IR/OpAsmInterface.td

index 8b8d70e..98b5095 100644 (file)
--- a/mlir/include/mlir/IR/OpAsmInterface.td
+++ b/mlir/include/mlir/IR/OpAsmInterface.td
@@ -116,7 +116,7 @@ def OpAsmOpInterface : OpInterface<"OpAsmOpInterface"> {
  /// This parameter represents a handle to a resource that is encoded into the
  /// "dialect_resources" section of the assembly format. This parameter expects a
  /// C++ `handleType` that derives from `AsmDialectResourceHandleBase` and
-/// implements a derived handle to the desired resource type. 
+/// implements a derived handle to the desired resource type.
  class ResourceHandleParameter<string handleType, string desc = "">
      : AttrOrTypeParameter<handleType, desc> {
    let parser = "$_parser.parseResourceHandle<" # handleType # ">()";
diff --git a/mlir/include/mlir/IR/SubElementInterfaces.td b/mlir/include/mlir/IR/SubElementInterfaces.td

index 7718feb..d7054b4 100644 (file)
--- a/mlir/include/mlir/IR/SubElementInterfaces.td
+++ b/mlir/include/mlir/IR/SubElementInterfaces.td
@@ -116,7 +116,7 @@ def SubElementAttrInterface
    let description = [{
      An interface used to query and manipulate sub-elements, such as sub-types
      and sub-attributes of a composite attribute.
-    
+
      To support the introspection of custom parameters that hold sub-elements,
      a specialization of the `AttrTypeSubElementHandler` class must be provided.
    }];
@@ -133,7 +133,7 @@ def SubElementTypeInterface
    let description = [{
      An interface used to query and manipulate sub-elements, such as sub-types
      and sub-attributes of a composite type.
-    
+
      To support the introspection of custom parameters that hold sub-elements,
      a specialization of the `AttrTypeSubElementHandler` class must be provided.
    }];
diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td

index 093c6d2..e1d36a8 100644 (file)
--- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
+++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
@@ -140,7 +140,7 @@ def RegionBranchOpInterface : OpInterface<"RegionBranchOpInterface"> {
          `getSuccessorRegions`.
        }],
        "::mlir::OperandRange", "getSuccessorEntryOperands",
-      (ins "::llvm::Optional<unsigned>":$index), [{}], 
+      (ins "::llvm::Optional<unsigned>":$index), [{}],
        /*defaultImplementation=*/[{
          auto operandEnd = this->getOperation()->operand_end();
          return ::mlir::OperandRange(operandEnd, operandEnd);
diff --git a/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.td b/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.td

index 29870c5..424b4cf 100644 (file)
--- a/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.td
+++ b/mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.td
@@ -19,8 +19,8 @@ def ParallelCombiningOpInterface : OpInterface<"ParallelCombiningOpInterface"> {
    let description = [{
      A parallel combining op is an op with a region.
  
-    This is useful as a terminator to parallel operations that iterate over 
-    some set and return tensors while avoiding tight coupling between the 
+    This is useful as a terminator to parallel operations that iterate over
+    some set and return tensors while avoiding tight coupling between the
      iterating op, the combining op and the individual subtensor producing ops.
    }];
    let cppNamespace = "::mlir";
diff --git a/mlir/include/mlir/Interfaces/TilingInterface.td b/mlir/include/mlir/Interfaces/TilingInterface.td

index dc6ffcb..6cc0685 100644 (file)
--- a/mlir/include/mlir/Interfaces/TilingInterface.td
+++ b/mlir/include/mlir/Interfaces/TilingInterface.td
@@ -110,7 +110,7 @@ def TilingInterface : OpInterface<"TilingInterface"> {
            result required. This method enables fusion by using tile
            and fuse. The method returns failure if the operation can't be
            tiled to generate the result tile. In practical terms this
-          implies it cannot be tiled and fused with its consumers.        
+          implies it cannot be tiled and fused with its consumers.
  
            - `offsets` provides the offset of the tile in the coordinate system
              of the original iteration space, i.e., if an iteration space
@@ -133,13 +133,13 @@ def TilingInterface : OpInterface<"TilingInterface"> {
        >,
        InterfaceMethod<
          /*desc=*/[{
-          Generates the scalar implementation of the operation. 
+          Generates the scalar implementation of the operation.
  
            Given the list `ivs` that represent points in the iteration space
            (as specified by `getIterationDomain()`) returns the scalar operations
            that represent the computation at that point in the iteration space.
            This method is typically used as the "exit path", i.e. once all
-          transformations are done, this method can be used to lower to scalar 
+          transformations are done, this method can be used to lower to scalar
            code that can then be lowered to LLVM or SPIR-V dialects.
          }],
          /*retType=*/"LogicalResult",
@@ -153,7 +153,7 @@ def TilingInterface : OpInterface<"TilingInterface"> {
            return failure();
          }]
        >
-  ];  
+  ];
  }
  
  def PartialReductionOpInterface : OpInterface<"PartialReductionOpInterface"> {
diff --git a/mlir/test/Analysis/DataFlow/test-dead-code-analysis.mlir b/mlir/test/Analysis/DataFlow/test-dead-code-analysis.mlir

index 2668b83..020c8b2 100644 (file)
--- a/mlir/test/Analysis/DataFlow/test-dead-code-analysis.mlir
+++ b/mlir/test/Analysis/DataFlow/test-dead-code-analysis.mlir
@@ -8,7 +8,7 @@
  // CHECK:    from ^bb0 = live
  // CHECK:   ^bb2 = live
  // CHECK:    from ^bb1 = live
-func.func @test_cfg(%cond: i1) -> () 
+func.func @test_cfg(%cond: i1) -> ()
      attributes {tag = "test_cfg"} {
    cf.br ^bb1
  
@@ -23,10 +23,10 @@ func.func @test_region_control_flow(%cond: i1, %arg0: i64, %arg1: i64) -> () {
    // CHECK: test_if:
    // CHECK:  region #0
    // CHECK: region_preds: (all) predecessors:
-  // CHECK:   scf.if 
+  // CHECK:   scf.if
    // CHECK:  region #1
    // CHECK: region_preds: (all) predecessors:
-  // CHECK:   scf.if 
+  // CHECK:   scf.if
    // CHECK: op_preds: (all) predecessors:
    // CHECK:   scf.yield {then}
    // CHECK:   scf.yield {else}
@@ -34,13 +34,13 @@ func.func @test_region_control_flow(%cond: i1, %arg0: i64, %arg1: i64) -> () {
      scf.yield {then}
    } else {
      scf.yield {else}
-  } {tag = "test_if"} 
+  } {tag = "test_if"}
  
    // test_while:
    //  region #0
    // region_preds: (all) predecessors:
-  //   scf.while 
-  //   scf.yield 
+  //   scf.while
+  //   scf.yield
    //  region #1
    // region_preds: (all) predecessors:
    //   scf.condition
@@ -65,9 +65,9 @@ func.func @test_region_control_flow(%cond: i1, %arg0: i64, %arg1: i64) -> () {
  // CHECK: op_preds: (all) predecessors:
  // CHECK:   func.call @foo(%{{.*}}) {tag = "a"}
  // CHECK:   func.call @foo(%{{.*}}) {tag = "b"}
-func.func private @foo(%arg0: i32) -> i32 
+func.func private @foo(%arg0: i32) -> i32
      attributes {tag = "foo"} {
-  return {a} %arg0 : i32 
+  return {a} %arg0 : i32
  }
  
  // CHECK: bar:
@@ -75,7 +75,7 @@ func.func private @foo(%arg0: i32) -> i32
  // CHECK:   ^bb0 = live
  // CHECK: op_preds: predecessors:
  // CHECK:   func.call @bar(%{{.*}}) {tag = "c"}
-func.func @bar(%cond: i1) -> i32 
+func.func @bar(%cond: i1) -> i32
      attributes {tag = "bar"} {
    cf.cond_br %cond, ^bb1, ^bb2
  
@@ -125,7 +125,7 @@ func.func @test_callgraph(%cond: i1, %arg0: i32) -> i32 {
  // CHECK:    from ^bb0 = live
  // CHECK:   ^bb2 = live
  // CHECK:    from ^bb0 = live
-func.func @test_unknown_branch() -> () 
+func.func @test_unknown_branch() -> ()
      attributes {tag = "test_unknown_branch"} {
    "test.unknown_br"() [^bb1, ^bb2] : () -> ()
  
@@ -145,7 +145,7 @@ func.func @test_unknown_region() -> () {
    "test.unknown_region_br"() ({
    ^bb0:
      "test.unknown_region_end"() : () -> ()
-  }, {    
+  }, {
    ^bb0:
      "test.unknown_region_end"() : () -> ()
    }) {tag = "test_unknown_region"} : () -> ()
@@ -157,7 +157,7 @@ func.func @test_unknown_region() -> () {
  // CHECK:   ^bb0 = live
  // CHECK:   ^bb1 = live
  // CHECK:   ^bb2 = dead
-func.func @test_known_dead_block() -> () 
+func.func @test_known_dead_block() -> ()
      attributes {tag = "test_known_dead_block"} {
    %true = arith.constant true
    cf.cond_br %true, ^bb1, ^bb2
@@ -178,7 +178,7 @@ func.func @test_known_dead_edge(%arg0: i1) -> ()
    cf.cond_br %arg0, ^bb1, ^bb2
  
  ^bb1:
-  %true = arith.constant true 
+  %true = arith.constant true
    cf.cond_br %true, ^bb3, ^bb2
  
  ^bb2:
@@ -196,14 +196,14 @@ func.func @test_known_region_predecessors() -> () {
    // CHECK:  region #1
    // CHECK:   ^bb0 = live
    // CHECK: region_preds: (all) predecessors:
-  // CHECK:   scf.if 
+  // CHECK:   scf.if
    // CHECK: op_preds: (all) predecessors:
    // CHECK:   scf.yield {else}
    scf.if %false {
      scf.yield {then}
    } else {
      scf.yield {else}
-  } {tag = "test_known_if"} 
+  } {tag = "test_known_if"}
    return
  }
  
@@ -220,10 +220,10 @@ func.func @test_dead_callsite() -> () {
    %true = arith.constant true
    scf.if %true {
      func.call @callable() {then} : () -> ()
-    scf.yield 
+    scf.yield
    } else {
      func.call @callable() {else} : () -> ()
-    scf.yield 
+    scf.yield
    }
    return
  }
diff --git a/mlir/test/Analysis/test-foo-analysis.mlir b/mlir/test/Analysis/test-foo-analysis.mlir

index 83ac5b5..edd153d 100644 (file)
--- a/mlir/test/Analysis/test-foo-analysis.mlir
+++ b/mlir/test/Analysis/test-foo-analysis.mlir
@@ -4,7 +4,7 @@
  func.func @test_default_init() -> () {
    // CHECK: a -> 0
    "test.foo"() {tag = "a"} : () -> ()
-  return 
+  return
  }
  
  // -----
@@ -15,7 +15,7 @@ func.func @test_one_join() -> () {
    "test.foo"() {tag = "a"} : () -> ()
    // CHECK: b -> 1
    "test.foo"() {tag = "b", foo = 1 : ui64} : () -> ()
-  return 
+  return
  }
  
  // -----
@@ -28,7 +28,7 @@ func.func @test_two_join() -> () {
    "test.foo"() {tag = "b", foo = 1 : ui64} : () -> ()
    // CHECK: c -> 0
    "test.foo"() {tag = "c", foo = 1 : ui64} : () -> ()
-  return 
+  return
  }
  
  // -----
diff --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir

index e11187a..640cac1 100644 (file)
--- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir
+++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir
@@ -268,15 +268,15 @@ func.func @complex_log1p(%arg: complex<f32>) -> complex<f32> {
  // CHECK: %[[ONE_HALF:.*]] = arith.constant 5.000000e-01 : f32
  // CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32
  // CHECK: %[[TWO:.*]] = arith.constant 2.000000e+00 : f32
-// CHECK: %[[SQ_SUM_0:.*]] = arith.mulf %[[REAL]], %[[REAL]] : f32 
+// CHECK: %[[SQ_SUM_0:.*]] = arith.mulf %[[REAL]], %[[REAL]] : f32
  // CHECK: %[[TWO_REAL:.*]] = arith.mulf %[[REAL]], %[[TWO]] : f32
-// CHECK: %[[SQ_SUM_1:.*]] = arith.addf %[[SQ_SUM_0]], %[[TWO_REAL]] : f32 
-// CHECK: %[[SQ_IMAG:.*]] = arith.mulf %[[IMAG]], %[[IMAG]] : f32 
-// CHECK: %[[SQ_SUM_2:.*]] = arith.addf %[[SQ_SUM_1]], %[[SQ_IMAG]] : f32 
-// CHECK: %[[LOG_SQ_SUM:.*]] = math.log1p %[[SQ_SUM_2]] : f32 
+// CHECK: %[[SQ_SUM_1:.*]] = arith.addf %[[SQ_SUM_0]], %[[TWO_REAL]] : f32
+// CHECK: %[[SQ_IMAG:.*]] = arith.mulf %[[IMAG]], %[[IMAG]] : f32
+// CHECK: %[[SQ_SUM_2:.*]] = arith.addf %[[SQ_SUM_1]], %[[SQ_IMAG]] : f32
+// CHECK: %[[LOG_SQ_SUM:.*]] = math.log1p %[[SQ_SUM_2]] : f32
  // CHECK: %[[RESULT_REAL:.*]] = arith.mulf %[[LOG_SQ_SUM]], %[[ONE_HALF]] : f32
  // CHECK: %[[REAL_PLUS_ONE:.*]] = arith.addf %[[REAL]], %[[ONE]] : f32
-// CHECK: %[[RESULT_IMAG:.*]] = math.atan2 %[[IMAG]], %[[REAL_PLUS_ONE]] : f32 
+// CHECK: %[[RESULT_IMAG:.*]] = math.atan2 %[[IMAG]], %[[REAL_PLUS_ONE]] : f32
  // CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex<f32>
  // CHECK: return %[[RESULT]] : complex<f32>
  
diff --git a/mlir/test/Conversion/FuncToLLVM/convert-types.mlir b/mlir/test/Conversion/FuncToLLVM/convert-types.mlir

index fd618b7..c55bb8b 100644 (file)
--- a/mlir/test/Conversion/FuncToLLVM/convert-types.mlir
+++ b/mlir/test/Conversion/FuncToLLVM/convert-types.mlir
@@ -10,11 +10,11 @@ func.func private @ptr() -> !llvm.ptr<!test.smpla>
  func.func private @opaque_ptr() -> !llvm.ptr
  
  // CHECK-LABEL: @ptr_ptr()
-// CHECK: !llvm.ptr<ptr<i42>> 
+// CHECK: !llvm.ptr<ptr<i42>>
  func.func private @ptr_ptr() -> !llvm.ptr<!llvm.ptr<!test.smpla>>
  
  // CHECK-LABEL: @struct_ptr()
-// CHECK: !llvm.struct<(ptr<i42>)> 
+// CHECK: !llvm.struct<(ptr<i42>)>
  func.func private @struct_ptr() -> !llvm.struct<(ptr<!test.smpla>)>
  
  // CHECK-LABEL: @named_struct_ptr()
@@ -26,7 +26,7 @@ func.func private @named_struct_ptr() -> !llvm.struct<"named", (ptr<!test.smpla>
  func.func private @named_no_convert() -> !llvm.struct<"no_convert", (ptr<struct<"no_convert">>)>
  
  // CHECK-LABEL: @array_ptr()
-// CHECK: !llvm.array<10 x ptr<i42>> 
+// CHECK: !llvm.array<10 x ptr<i42>>
  func.func private @array_ptr() -> !llvm.array<10 x ptr<!test.smpla>>
  
  // CHECK-LABEL: @func()
diff --git a/mlir/test/Conversion/FuncToLLVM/func-memref-return.mlir b/mlir/test/Conversion/FuncToLLVM/func-memref-return.mlir

index de6d565..b80abf3 100644 (file)
--- a/mlir/test/Conversion/FuncToLLVM/func-memref-return.mlir
+++ b/mlir/test/Conversion/FuncToLLVM/func-memref-return.mlir
@@ -90,7 +90,7 @@ func.func @check_memref_func_call(%in : memref<10xi8>) -> memref<20xi8> {
  // -----
  
  // BAREPTR-LABEL: func @check_return(
-// BAREPTR-SAME: %{{.*}}: memref<?xi8>) -> memref<?xi8> 
+// BAREPTR-SAME: %{{.*}}: memref<?xi8>) -> memref<?xi8>
  func.func @check_return(%in : memref<?xi8>) -> memref<?xi8> {
    // BAREPTR: llvm.return {{.*}} : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
    return %in : memref<?xi8>
diff --git a/mlir/test/Conversion/MemRefToLLVM/convert-alloca-scope.mlir b/mlir/test/Conversion/MemRefToLLVM/convert-alloca-scope.mlir

index c859fc1..f443f05 100644 (file)
--- a/mlir/test/Conversion/MemRefToLLVM/convert-alloca-scope.mlir
+++ b/mlir/test/Conversion/MemRefToLLVM/convert-alloca-scope.mlir
@@ -2,12 +2,12 @@
  
  // CHECK-LABEL: @empty
  func.func @empty() {
-  // CHECK: llvm.intr.stacksave 
+  // CHECK: llvm.intr.stacksave
    // CHECK: llvm.br
    memref.alloca_scope {
      memref.alloca_scope.return
    }
-  // CHECK: llvm.intr.stackrestore 
+  // CHECK: llvm.intr.stackrestore
    // CHECK: llvm.br
    return
  }
@@ -15,37 +15,37 @@ func.func @empty() {
  // CHECK-LABEL: @returns_nothing
  func.func @returns_nothing(%b: f32) {
    %a = arith.constant 10.0 : f32
-  // CHECK: llvm.intr.stacksave 
+  // CHECK: llvm.intr.stacksave
    memref.alloca_scope {
      %c = arith.addf %a, %b : f32
      memref.alloca_scope.return
    }
-  // CHECK: llvm.intr.stackrestore 
+  // CHECK: llvm.intr.stackrestore
    return
  }
  
  // CHECK-LABEL: @returns_one_value
  func.func @returns_one_value(%b: f32) -> f32 {
    %a = arith.constant 10.0 : f32
-  // CHECK: llvm.intr.stacksave 
+  // CHECK: llvm.intr.stacksave
    %result = memref.alloca_scope -> f32 {
      %c = arith.addf %a, %b : f32
      memref.alloca_scope.return %c: f32
    }
-  // CHECK: llvm.intr.stackrestore 
+  // CHECK: llvm.intr.stackrestore
    return %result : f32
  }
  
  // CHECK-LABEL: @returns_multiple_values
  func.func @returns_multiple_values(%b: f32) -> f32 {
    %a = arith.constant 10.0 : f32
-  // CHECK: llvm.intr.stacksave 
+  // CHECK: llvm.intr.stacksave
    %result1, %result2 = memref.alloca_scope -> (f32, f32) {
      %c = arith.addf %a, %b : f32
      %d = arith.subf %a, %b : f32
      memref.alloca_scope.return %c, %d: f32, f32
    }
-  // CHECK: llvm.intr.stackrestore 
+  // CHECK: llvm.intr.stackrestore
    %result = arith.addf %result1, %result2 : f32
    return %result : f32
  }
diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir

index c95b2fc..d7535cc 100644 (file)
--- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
@@ -13,12 +13,12 @@ func.func @m16n8k16_fp16(%arg0: vector<4x2xf16>, %arg1: vector<2x2xf16>, %arg2:
    // CHECK-NOT llvm.extractvalue
    // CHECK: [[d:%.+]] = nvvm.mma.sync
    // CHECK-SAME: shape = #nvvm.shape<m = 16, n = 8, k = 16>
-  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 16]} : (vector<4x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>    
-  // CHECK-DAG: llvm.extractvalue [[d]][0] : !llvm.struct<(vector<2xf16>, vector<2xf16>)>    
+  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 16]} : (vector<4x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>
+  // CHECK-DAG: llvm.extractvalue [[d]][0] : !llvm.struct<(vector<2xf16>, vector<2xf16>)>
    // CHECK-DAG: llvm.extractvalue [[d]][1] : !llvm.struct<(vector<2xf16>, vector<2xf16>)>
    // CHECK: llvm.mlir.undef : !llvm.array<2 x vector<2xf16>>
    // CHECK-DAG: llvm.insertvalue {{%.+}}, {{%.+}}[0] : !llvm.array<2 x vector<2xf16>>
-  // CHECK-DAG: llvm.insertvalue {{%.+}}, {{%.+}}[1] : !llvm.array<2 x vector<2xf16>>      
+  // CHECK-DAG: llvm.insertvalue {{%.+}}, {{%.+}}[1] : !llvm.array<2 x vector<2xf16>>
    return %d : vector<2x2xf16>
  }
  
@@ -32,21 +32,21 @@ func.func @m16n8k16_fp16_fp32(%arg0: vector<4x2xf16>, %arg1: vector<2x2xf16>, %a
    // CHECK: [[d:%.+]] = nvvm.mma.sync
    // CHECK-SAME: shape = #nvvm.shape<m = 16, n = 8, k = 16>
    // CHECK-SAME: (vector<2xf16>, vector<2xf16>, f32) -> !llvm.struct<(f32, f32, f32, f32)>
-  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 16]} : (vector<4x2xf16>, vector<2x2xf16>, vector<2x2xf32>) -> vector<2x2xf32>    
+  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 16]} : (vector<4x2xf16>, vector<2x2xf16>, vector<2x2xf32>) -> vector<2x2xf32>
    // CHECK: [[undef:%.+]] = llvm.mlir.undef : vector<2xf32>
    // CHECK-DAG: llvm.extractvalue [[d]][0] : !llvm.struct<(f32, f32, f32, f32)>
    // CHECK-DAG: llvm.extractvalue [[d]][1] : !llvm.struct<(f32, f32, f32, f32)>
    // CHECK: [[d00:%.+]] = llvm.insertelement {{%.+}}, [[undef]][{{.*}}] : vector<2xf32>
    // CHECK: [[d01:%.+]] = llvm.insertelement {{%.+}}, [[d00]][{{.*}}] : vector<2xf32>
  
-  // CHECK: [[undef:%.+]] = llvm.mlir.undef : vector<2xf32>  
+  // CHECK: [[undef:%.+]] = llvm.mlir.undef : vector<2xf32>
    // CHECK-DAG: llvm.extractvalue [[d]][2] : !llvm.struct<(f32, f32, f32, f32)>
    // CHECK-DAG: llvm.extractvalue [[d]][3] : !llvm.struct<(f32, f32, f32, f32)>
    // CHECK: [[d10:%.+]] = llvm.insertelement {{%.+}}, [[undef]][{{.*}}] : vector<2xf32>
    // CHECK: [[d11:%.+]] = llvm.insertelement {{%.+}}, [[d10]][{{.*}}] : vector<2xf32>
-  
+
    // CHECK-DAG: llvm.insertvalue [[d01]], {{%.+}}[0] : !llvm.array<2 x vector<2xf32>>
-  // CHECK-DAG: llvm.insertvalue [[d11]], {{%.+}}[1] : !llvm.array<2 x vector<2xf32>>      
+  // CHECK-DAG: llvm.insertvalue [[d11]], {{%.+}}[1] : !llvm.array<2 x vector<2xf32>>
    return %d : vector<2x2xf32>
  }
  
@@ -62,12 +62,12 @@ func.func @m16n8k8_fp16(%arg0: vector<2x2xf16>, %arg1: vector<1x2xf16>, %arg2: v
    // CHECK-NOT llvm.extractvalue
    // CHECK: [[d:%.+]] = nvvm.mma.sync
    // CHECK-SAME: shape = #nvvm.shape<m = 16, n = 8, k = 8>
-  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 8]} : (vector<2x2xf16>, vector<1x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>    
-  // CHECK-DAG: llvm.extractvalue [[d]][0] : !llvm.struct<(vector<2xf16>, vector<2xf16>)>    
+  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 8]} : (vector<2x2xf16>, vector<1x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>
+  // CHECK-DAG: llvm.extractvalue [[d]][0] : !llvm.struct<(vector<2xf16>, vector<2xf16>)>
    // CHECK-DAG: llvm.extractvalue [[d]][1] : !llvm.struct<(vector<2xf16>, vector<2xf16>)>
    // CHECK: llvm.mlir.undef : !llvm.array<2 x vector<2xf16>>
    // CHECK-DAG: llvm.insertvalue {{%.+}}, {{%.+}}[0] : !llvm.array<2 x vector<2xf16>>
-  // CHECK-DAG: llvm.insertvalue {{%.+}}, {{%.+}}[1] : !llvm.array<2 x vector<2xf16>>      
+  // CHECK-DAG: llvm.insertvalue {{%.+}}, {{%.+}}[1] : !llvm.array<2 x vector<2xf16>>
    // CHECK: return
    return %d : vector<2x2xf16>
  }
@@ -107,9 +107,9 @@ func.func @m16n8k32_i4(%arg0: vector<2x8xi4>, %arg1: vector<1x8xi4>, %arg2: vect
    // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<8xi4>>
    // CHECK: llvm.bitcast [[el]] : vector<8xi4> to i32
    // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<8xi4>>
-  // CHECK: llvm.bitcast [[el]] : vector<8xi4> to i32    
+  // CHECK: llvm.bitcast [[el]] : vector<8xi4> to i32
    // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<1 x vector<8xi4>>
-  // CHECK: llvm.bitcast [[el]] : vector<8xi4> to i32  
+  // CHECK: llvm.bitcast [[el]] : vector<8xi4> to i32
    // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<2xi32>>
    // CHECK: [[el:%.+]] = llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<2xi32>>
    // CHECK: [[d:%.+]] = nvvm.mma.sync
@@ -159,7 +159,7 @@ func.func @m8n8k4_f64(%arg0: vector<1x1xf64>, %arg1: vector<1x1xf64>, %arg2: vec
    // CHECK-SAME: shape = #nvvm.shape<m = 8, n = 8, k = 4>
    %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [8, 8, 4]} : (vector<1x1xf64>, vector<1x1xf64>, vector<1x2xf64>) -> vector<1x2xf64>
    // CHECK: llvm.mlir.undef : vector<2xf64>
-  // CHECK-DAG: llvm.extractvalue [[d]][0] : !llvm.struct<(f64, f64)>    
+  // CHECK-DAG: llvm.extractvalue [[d]][0] : !llvm.struct<(f64, f64)>
    // CHECK-DAG: llvm.extractvalue [[d]][1] : !llvm.struct<(f64, f64)>
    // CHECK-COUNT-2: llvm.insertelement {{.*}} : vector<2xf64>
    // CHECK-DAG: llvm.insertvalue {{%.+}}, {{%.+}}[0] : !llvm.array<1 x vector<2xf64>>
@@ -196,19 +196,19 @@ func.func @ldmatrix_x4(%arg0: memref<128x128xf16, 3>) ->  vector<4x2xf16> {
  func.func @ldmatrix_x1(%arg0: memref<128x128xf16, 3>) ->  vector<1x2xf16> {
    %c0  = arith.constant 0 : index
    // CHECK: nvvm.ldmatrix {{%.+}} {layout = #nvvm.mma_layout<row>, num = 1 : i32} {{.*}} -> i32
-  %a = nvgpu.ldmatrix %arg0[%c0, %c0] {transpose = false, numTiles = 1 : i32} : memref<128x128xf16, 3> -> vector<1x2xf16>    
+  %a = nvgpu.ldmatrix %arg0[%c0, %c0] {transpose = false, numTiles = 1 : i32} : memref<128x128xf16, 3> -> vector<1x2xf16>
    // CHECK: llvm.bitcast
-  // CHECK: llvm.insertvalue    
+  // CHECK: llvm.insertvalue
    return %a : vector<1x2xf16>
  }
  
  // -----
  
  // CHECK-LABEL: @m16n8k4_tf32
-func.func @m16n8k4_tf32(%arg0: vector<2x1xf32>, %arg1: vector<1x1xf32>, %arg2: vector<2x2xf32>) -> vector<2x2xf32> {  
+func.func @m16n8k4_tf32(%arg0: vector<2x1xf32>, %arg1: vector<1x1xf32>, %arg2: vector<2x2xf32>) -> vector<2x2xf32> {
    // The A, B operand should be bitcast to i32
    // CHECK: llvm.extractvalue
-  // CHECK: llvm.bitcast {{.*}} : vector<1xf32> to i32  
+  // CHECK: llvm.bitcast {{.*}} : vector<1xf32> to i32
    // CHECK: llvm.extractvalue
    // CHECK: llvm.bitcast {{.*}} : vector<1xf32> to i32
    // CHECK: llvm.extractvalue
@@ -218,22 +218,22 @@ func.func @m16n8k4_tf32(%arg0: vector<2x1xf32>, %arg1: vector<1x1xf32>, %arg2: v
    // CHECK-SAME: multiplicandAPtxType = #nvvm.mma_type<tf32>
    // CHECK-SAME: multiplicandBPtxType = #nvvm.mma_type<tf32>
    // CHECK-SAME: shape = #nvvm.shape<m = 16, n = 8, k = 4>
-  // CHECK-SAME: -> !llvm.struct<(f32, f32, f32, f32)>  
-  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 4], tf32Enabled} : (vector<2x1xf32>, vector<1x1xf32>, vector<2x2xf32>) -> vector<2x2xf32>  
+  // CHECK-SAME: -> !llvm.struct<(f32, f32, f32, f32)>
+  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 4], tf32Enabled} : (vector<2x1xf32>, vector<1x1xf32>, vector<2x2xf32>) -> vector<2x2xf32>
    // CHECK: [[undef:%.+]] = llvm.mlir.undef : vector<2xf32>
    // CHECK-DAG: llvm.extractvalue [[d]][0] : !llvm.struct<(f32, f32, f32, f32)>
    // CHECK-DAG: llvm.extractvalue [[d]][1] : !llvm.struct<(f32, f32, f32, f32)>
    // CHECK: [[d00:%.+]] = llvm.insertelement {{%.+}}, [[undef]][{{.*}}] : vector<2xf32>
    // CHECK: [[d01:%.+]] = llvm.insertelement {{%.+}}, [[d00]][{{.*}}] : vector<2xf32>
  
-  // CHECK: [[undef:%.+]] = llvm.mlir.undef : vector<2xf32>  
+  // CHECK: [[undef:%.+]] = llvm.mlir.undef : vector<2xf32>
    // CHECK-DAG: llvm.extractvalue [[d]][2] : !llvm.struct<(f32, f32, f32, f32)>
    // CHECK-DAG: llvm.extractvalue [[d]][3] : !llvm.struct<(f32, f32, f32, f32)>
    // CHECK: [[d10:%.+]] = llvm.insertelement {{%.+}}, [[undef]][{{.*}}] : vector<2xf32>
    // CHECK: [[d11:%.+]] = llvm.insertelement {{%.+}}, [[d10]][{{.*}}] : vector<2xf32>
-  
+
    // CHECK-DAG: llvm.insertvalue [[d01]], {{%.+}}[0] : !llvm.array<2 x vector<2xf32>>
-  // CHECK-DAG: llvm.insertvalue [[d11]], {{%.+}}[1] : !llvm.array<2 x vector<2xf32>>   
+  // CHECK-DAG: llvm.insertvalue [[d11]], {{%.+}}[1] : !llvm.array<2 x vector<2xf32>>
    return %d : vector<2x2xf32>
  }
  
@@ -281,7 +281,7 @@ func.func @async_cp_i4(
    // CHECK: %[[IDX1:.*]] = builtin.unrealized_conversion_cast %[[IDX]] : index to i64
    // CHECK-DAG: %[[BASEDST:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<i4, 3>, ptr<i4, 3>, i64, array<2 x i64>, array<2 x i64>)>
    // CHECK-DAG: %[[S0:.*]] = llvm.mlir.constant(128 : index) : i64
-  // CHECK-DAG: %[[LI:.*]] = llvm.mul %[[IDX1]], %[[S0]] : i64    
+  // CHECK-DAG: %[[LI:.*]] = llvm.mul %[[IDX1]], %[[S0]] : i64
    // CHECK-DAG: %[[FI1:.*]] = llvm.add %[[LI]], %[[IDX1]] : i64
    // CHECK-DAG: %[[ADDRESSDST:.*]] = llvm.getelementptr %[[BASEDST]][%[[FI1]]] : (!llvm.ptr<i4, 3>, i64) -> !llvm.ptr<i4, 3>
    // CHECK-DAG: %[[CAST0:.*]] = llvm.bitcast %[[ADDRESSDST]] : !llvm.ptr<i4, 3> to !llvm.ptr<i8, 3>
@@ -344,11 +344,11 @@ func.func @mma_sp_sync_f16_16832(%arg0: vector<4x2xf16>,
    // CHECK-SAME: "=r,=r,r,r,r,r,r,r,r,r,r,r,r,r"
    // CHECK-SAME: %[[sparseMetadata]], %[[sparseSelector]] :
    // CHECK-SAME: -> !llvm.struct<(vector<2xf16>, vector<2xf16>)>
-  
+
    %d = nvgpu.mma.sp.sync(%arg0, %arg1, %arg2) metadata(%arg3) {mmaShape = [16, 8, 32]} :
      (vector<4x2xf16>, vector<4x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>
  
-  // CHECK-DAG: llvm.extractvalue %[[d]][0] : !llvm.struct<(vector<2xf16>, vector<2xf16>)>    
+  // CHECK-DAG: llvm.extractvalue %[[d]][0] : !llvm.struct<(vector<2xf16>, vector<2xf16>)>
    // CHECK-DAG: llvm.extractvalue %[[d]][1] : !llvm.struct<(vector<2xf16>, vector<2xf16>)>
    //     CHECK: llvm.mlir.undef : !llvm.array<2 x vector<2xf16>>
    //     CHECK: llvm.insertvalue %{{.+}}, %{{.+}}[0] : !llvm.array<2 x vector<2xf16>>
@@ -365,10 +365,10 @@ func.func @mma_sp_sync_f16_16816(%arg0: vector<2x2xf16>,
                                   %arg3: vector<2xi16>) -> vector<2x2xf16> {
  
    // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<2xf16>>
-  // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<2xf16>>  
+  // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<2xf16>>
  
    // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<2xf16>>
-  // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<2xf16>>  
+  // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<2xf16>>
  
    // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<2xf16>>
    // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<2xf16>>
@@ -408,7 +408,7 @@ func.func @mma_sp_sync_i8_16864(%arg0: vector<4x4xi8>,
  
    // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<4xi8>>
    // CHECK: llvm.bitcast %{{.+}} : vector<4xi8> to i32
-  // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<4 x vector<4xi8>>  
+  // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<4 x vector<4xi8>>
    // CHECK: llvm.bitcast %{{.+}} : vector<4xi8> to i32
  
    // CHECK: llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<2xi32>>
diff --git a/mlir/test/Conversion/TosaToArith/tosa-to-arith.mlir b/mlir/test/Conversion/TosaToArith/tosa-to-arith.mlir

index 5fb1a6b..17a7ac3 100644 (file)
--- a/mlir/test/Conversion/TosaToArith/tosa-to-arith.mlir
+++ b/mlir/test/Conversion/TosaToArith/tosa-to-arith.mlir
@@ -68,7 +68,7 @@ func.func @apply_scale_test_i32(%arg0 : i32, %arg1 : i32, %arg2 : i8) -> (i32) {
  
    // Combine hi-low into the final result.
    // CHECK-DAG: %[[HIL:.+]] = arith.shli %[[FHI]], %[[HISHL]]
-  // CHECK-DAG: %[[HIALIGN:.+]] = arith.shrsi %[[HIL:.+]], %[[HISHR]] 
+  // CHECK-DAG: %[[HIALIGN:.+]] = arith.shrsi %[[HIL:.+]], %[[HISHR]]
    // CHECK-DAG: %[[LOR:.+]] = arith.shrui %[[LADD]], %[[S32]]
    // CHECK-DAG: %[[LOWALIGN:.+]] = arith.select %[[OVER31]], %[[C0]], %[[LOR]]
    // CHECK-DAG: %[[RESULT:.+]] = arith.addi %[[LOWALIGN]], %[[HIALIGN]]
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir

index df22ee1..84f9f96 100644 (file)
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
@@ -528,7 +528,7 @@ func.func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3
    // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
    // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
    // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
-  // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32):  
+  // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32):
    // CHECK:   [[ADD:%.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32
    // CHECK:   linalg.yield [[ADD]] : f32
    // CHECK: } -> tensor<1x5x5x33xf32>
@@ -556,7 +556,7 @@ func.func @depthwise_conv_quant(%arg0 : tensor<1x12x12x4xi8>, %arg1 : tensor<3x3
    // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>)
    // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
    // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) outs([[OUT]] : tensor<1x12x12x512xi32>) {
-  // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: i32, %[[ARG4:[0-9a-zA-Z_]+]]: i32, %[[ARG5:[0-9a-zA-Z_]+]]: i32):  
+  // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: i32, %[[ARG4:[0-9a-zA-Z_]+]]: i32, %[[ARG5:[0-9a-zA-Z_]+]]: i32):
    // CHECK:   [[ADD:%.+]] = arith.addi %[[ARG3]], %[[ARG4]] : i32
    // CHECK:   linalg.yield [[ADD]] : i32
    // CHECK: } -> tensor<1x12x12x512xi32>
@@ -580,7 +580,7 @@ func.func @depthwise_conv_quant_dilations(%arg0 : tensor<1x14x14x4xi8>, %arg1 :
    // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>)
    // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
    // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) outs([[OUT]] : tensor<1x10x10x512xi32>) {
-  // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: i32, %[[ARG4:[0-9a-zA-Z_]+]]: i32, %[[ARG5:[0-9a-zA-Z_]+]]: i32):  
+  // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: i32, %[[ARG4:[0-9a-zA-Z_]+]]: i32, %[[ARG5:[0-9a-zA-Z_]+]]: i32):
    // CHECK:   [[ADD:%.+]] = arith.addi %[[ARG3]], %[[ARG4]] : i32
    // CHECK:   linalg.yield [[ADD]] : i32
    // CHECK: } -> tensor<1x10x10x512xi32>
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir

index 70722c8..4453ad1 100644 (file)
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir
@@ -4,10 +4,10 @@
  // CHECK: #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
  // CHECK-LABEL: @broadcast_resize_nearest_fp
  func.func @broadcast_resize_nearest_fp(%arg0 : tensor<3x1x1x7xf32>) -> tensor<3x15x13x7xf32> {
-  // CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape %arg0 
+  // CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape %arg0
    // CHECK-SAME{literal}: [[0], [1, 2, 3]]
    // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<3x15x13x7xf32>
-  // CHECK: %[[GENERIC:.+]] = linalg.generic 
+  // CHECK: %[[GENERIC:.+]] = linalg.generic
    // CHECK-SAME: indexing_maps = [#map, #map1]
    // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
    // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xf32>)
@@ -29,7 +29,7 @@ func.func @broadcast_resize_bilinear_fp(%arg0 : tensor<3x1x1x7xf32>) -> tensor<3
    // CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape %arg0
    // CHECK-SAME{literal}: [[0], [1, 2, 3]]
    // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<3x15x13x7xf32>
-  // CHECK: %[[GENERIC:.+]] = linalg.generic 
+  // CHECK: %[[GENERIC:.+]] = linalg.generic
    // CHECK-SAME: indexing_maps = [#map, #map1]
    // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
    // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xf32>)
@@ -51,7 +51,7 @@ func.func @broadcast_resize_nearest_i8(%arg0 : tensor<3x1x1x7xi8>) -> tensor<3x1
    // CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape %arg0
    // CHECK-SAME{literal}: [[0], [1, 2, 3]]
    // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<3x15x13x7xi8>
-  // CHECK: %[[GENERIC:.+]] = linalg.generic 
+  // CHECK: %[[GENERIC:.+]] = linalg.generic
    // CHECK-SAME: indexing_maps = [#map, #map1]
    // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
    // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi8>)
@@ -70,16 +70,16 @@ func.func @broadcast_resize_nearest_i8(%arg0 : tensor<3x1x1x7xi8>) -> tensor<3x1
  // CHECK: #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
  // CHECK-LABEL: @broadcast_resize_nearest_i32
  func.func @broadcast_resize_nearest_i32(%arg0 : tensor<3x1x1x7xi8>) -> tensor<3x15x13x7xi32> {
-  // CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape %arg0 
+  // CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape %arg0
    // CHECK-SAME{literal}: [[0], [1, 2, 3]]
    // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<3x15x13x7xi32>
-  // CHECK: %[[GENERIC:.+]] = linalg.generic 
+  // CHECK: %[[GENERIC:.+]] = linalg.generic
    // CHECK-SAME: indexing_maps = [#map, #map1]
    // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
    // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi8>)
    // CHECK-SAME: outs(%[[EMPTY]] : tensor<3x15x13x7xi32>)
    // CHECK-NEXT: ^bb0(%[[IN:.+]]: i8, %[[OUT:.+]]: i32):
-  // CHECK:   %[[EXT:.+]] = arith.extsi %[[IN]] : i8 to i32 
+  // CHECK:   %[[EXT:.+]] = arith.extsi %[[IN]] : i8 to i32
    // CHECK:   linalg.yield %[[EXT]]
    %resize = "tosa.resize"(%arg0) {mode = "NEAREST_NEIGHBOR", scale = [2, 2, 1, 1], offset = [0, 0], border = [0, 0]} : (tensor<3x1x1x7xi8>) -> tensor<3x15x13x7xi32>
  
@@ -96,13 +96,13 @@ func.func @broadcast_resize_bilinear_i32(%arg0 : tensor<3x1x1x7xi8>) -> tensor<3
    // CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape %arg0
    // CHECK-SAME{literal}: [[0], [1, 2, 3]]
    // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<3x15x13x7xi32>
-  // CHECK: %[[GENERIC:.+]] = linalg.generic 
+  // CHECK: %[[GENERIC:.+]] = linalg.generic
    // CHECK-SAME: indexing_maps = [#map, #map1]
    // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
    // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi8>)
    // CHECK-SAME: outs(%[[EMPTY]] : tensor<3x15x13x7xi32>)
    // CHECK-NEXT: ^bb0(%[[IN:.+]]: i8, %[[OUT:.+]]: i32):
-  // CHECK: %[[EXT:.+]] = arith.extsi %[[IN]] : i8 to i32 
+  // CHECK: %[[EXT:.+]] = arith.extsi %[[IN]] : i8 to i32
    // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : i32
    // CHECK: %[[MUL1:.+]] = arith.muli %[[EXT]], %[[C2]] : i32
    // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : i32
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir

index 52674d4..349ad7e 100644 (file)
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
@@ -948,7 +948,7 @@ func.func @concat(%arg0: tensor<5x1xf32>, %arg1: tensor<6x1xf32>) -> () {
  // -----
  
  // CHECK-LABEL: @concat_non_axis_dyn
-// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
  // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]
  func.func @concat_non_axis_dyn(%arg0: tensor<5x?xf32>, %arg1: tensor<6x?xf32>) -> () {
    // CHECK: %[[AXIS:.+]] = arith.constant 0
@@ -969,8 +969,8 @@ func.func @concat_non_axis_dyn(%arg0: tensor<5x?xf32>, %arg1: tensor<6x?xf32>) -
  // -----
  
  // CHECK-LABEL: @concat_axis_dyn
-// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
-// CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]: 
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
+// CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]:
  func.func @concat_axis_dyn(%arg0: tensor<?x3xf32>, %arg1: tensor<?x3xf32>) -> () {
    // CHECK: %[[AXIS:.+]] = arith.constant 0
    // CHECK: %[[STRIDE:.+]]   = arith.constant 1
@@ -994,7 +994,7 @@ func.func @concat_axis_dyn(%arg0: tensor<?x3xf32>, %arg1: tensor<?x3xf32>) -> ()
  // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
  
  // CHECK-LABEL: @rescale_i8
-// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
  func.func @rescale_i8(%arg0 : tensor<2xi8>) -> () {
    // CHECK: [[C0:%.+]] = arith.constant 19689
    // CHECK: [[C1:%.+]] = arith.constant 15
@@ -1048,7 +1048,7 @@ func.func @rescale_i8(%arg0 : tensor<2xi8>) -> () {
  // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
  
  // CHECK-LABEL: @rescale_i8_dyn_batch
-// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
  func.func @rescale_i8_dyn_batch(%arg0 : tensor<?x2xi8>) -> () {
    // CHECK: %[[C0:.+]] = arith.constant 0
    // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
@@ -1070,7 +1070,7 @@ func.func @rescale_i8_dyn_batch(%arg0 : tensor<?x2xi8>) -> () {
  // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
  
  // CHECK-LABEL: @rescale_dyn
-// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
  func.func @rescale_dyn(%arg0 : tensor<1x?x?x32xi32>) -> () {
    // CHECK: %[[C1:.+]] = arith.constant 1
    // CHECK: %[[DIM1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
@@ -1087,7 +1087,7 @@ func.func @rescale_dyn(%arg0 : tensor<1x?x?x32xi32>) -> () {
  // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
  
  // CHECK-LABEL: @rescale_ui8
-// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
  func.func @rescale_ui8(%arg0 : tensor<2xui8>) -> () {
    // CHECK: [[C0:%.+]] = arith.constant 19689
    // CHECK: [[C1:%.+]] = arith.constant 15
@@ -1119,7 +1119,7 @@ func.func @rescale_ui8(%arg0 : tensor<2xui8>) -> () {
  // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
  
  // CHECK-LABEL: @rescale_per_channel
-// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
  func.func @rescale_per_channel(%arg0 : tensor<3xi8>) -> (tensor<3xi8>) {
    // CHECK: [[MULTIPLIERS:%.+]] = arith.constant dense<[42, 43, 0]>
    // CHECK: [[SHIFTS:%.+]] = arith.constant dense<[14, 15, 0]>
@@ -1172,7 +1172,7 @@ func.func @rescaleUnnecessaryDoubleRound(%arg0 : tensor<2xi8>) -> (tensor<2xi8>)
  // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
  
  // CHECK-LABEL: @reverse
-// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
  func.func @reverse(%arg0: tensor<5x4xi32>) -> () {
    // CHECK: %[[C0:.+]] = arith.constant 0
    // CHECK: %[[RDIM:.+]] = tensor.dim %[[ARG0]], %[[C0]]
@@ -1207,7 +1207,7 @@ func.func @reverse(%arg0: tensor<5x4xi32>) -> () {
  // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
  
  // CHECK-LABEL: @reverse_dyn
-// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
  func.func @reverse_dyn(%arg0: tensor<?xi32>) -> () {
    // CHECK: %[[C0_1:.+]] = arith.constant 0
    // CHECK: %[[D0_1:.+]] = tensor.dim %[[ARG0]], %[[C0_1]]
@@ -1263,7 +1263,7 @@ func.func @tile(%arg0 : tensor<2x3xi8>) -> () {
  // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
  
  // CHECK-LABEL: @tile_dyn_input
-// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
  func.func @tile_dyn_input(%arg0 : tensor<?x3xi8>) -> () {
    // CHECK: %[[CST0:.+]] = arith.constant 0
    // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST0]] : tensor<?x3xi8>
@@ -1284,7 +1284,7 @@ func.func @tile_dyn_input(%arg0 : tensor<?x3xi8>) -> () {
  // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
  
  // CHECK-LABEL: @tile_dyn_multiples
-// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
  func.func @tile_dyn_multiples(%arg0 : tensor<2x3xi8>) -> () {
    // CHECK: %[[CST1:.+]] = arith.constant 1
    // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST1]] : tensor<2x3xi8>
@@ -1302,7 +1302,7 @@ func.func @tile_dyn_multiples(%arg0 : tensor<2x3xi8>) -> () {
  // -----
  
  // CHECK-LABEL: @pad_float
-// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]: 
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
  func.func @pad_float(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) {
    %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32>
    // TODO: Output contains multiple "arith.constant 1 : index".
diff --git a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir

index 6e6d384..b2e34fa 100644 (file)
--- a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir
+++ b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir
@@ -32,11 +32,11 @@ func.func @m16n8k32_int8_row_row_row(%arg0: memref<128x128xi8, 3>, %arg1: memref
    %cst_0 = arith.constant dense<0> : vector<32x8xi8>
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
-  %c17 = arith.constant 17 : index  
-  %c39 = arith.constant 39 : index  
-  %c40 = arith.constant 40 : index  
-  %c49 = arith.constant 49 : index  
-  %c50 = arith.constant 50 : index  
+  %c17 = arith.constant 17 : index
+  %c39 = arith.constant 39 : index
+  %c40 = arith.constant 40 : index
+  %c49 = arith.constant 49 : index
+  %c50 = arith.constant 50 : index
    %cst = arith.constant 0 : i8
    %cst0 = arith.constant 0 : i32
  
@@ -62,7 +62,7 @@ func.func @m16n8k32_int8_row_row_row(%arg0: memref<128x128xi8, 3>, %arg1: memref
    // CHECK-DAG: [[col:%.+]] = affine.apply [[$colB0_map]]()[{{%.+}}]
    // CHECK: memref.load %arg1[[[row]], [[col]]] : memref<128x128xi8, 3>
    // CHECK-DAG: [[col:%.+]] = affine.apply [[$colB0_map]]()[{{%.+}}]
-  // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowB4_map]]()[{{%.+}}]  
+  // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowB4_map]]()[{{%.+}}]
    // CHECK: memref.load %arg1[[[row]], [[col]]] : memref<128x128xi8, 3>
    // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowB5_map]]()[{{%.+}}]
    // CHECK-DAG: [[col:%.+]] = affine.apply [[$colB0_map]]()[{{%.+}}]
@@ -124,11 +124,11 @@ func.func @m8n8k4_f64_row_row_row(%arg0: memref<128x128xf64>, %arg1: memref<128x
    %cst_0 = arith.constant dense<0.0> : vector<4x8xf64>
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
-  %c17 = arith.constant 17 : index  
-  %c39 = arith.constant 39 : index  
-  %c40 = arith.constant 40 : index  
-  %c49 = arith.constant 49 : index  
-  %c50 = arith.constant 50 : index  
+  %c17 = arith.constant 17 : index
+  %c39 = arith.constant 39 : index
+  %c40 = arith.constant 40 : index
+  %c49 = arith.constant 49 : index
+  %c50 = arith.constant 50 : index
    %cst = arith.constant 0.0 : f64
    %cst0 = arith.constant 0.0 : f64
  
@@ -147,7 +147,7 @@ func.func @m8n8k4_f64_row_row_row(%arg0: memref<128x128xf64>, %arg1: memref<128x
  
    // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowC0_map]]
    // CHECK-DAG: [[col:%.+]] = affine.apply [[$colC0_map]]
-  // CHECK: vector.load %arg2[[[row]], [[col]]] : memref<128x128xf64>, vector<2xf64>  
+  // CHECK: vector.load %arg2[[[row]], [[col]]] : memref<128x128xf64>, vector<2xf64>
  
    %A = vector.transfer_read %arg0[%c1, %c1], %cst {in_bounds = [true, true]} : memref<128x128xf64>, vector<8x4xf64>
    %B = vector.transfer_read %arg1[%c39, %c40], %cst {in_bounds = [true, true], permutation_map = #map0} : memref<128x128xf64>, vector<8x4xf64>
@@ -157,7 +157,7 @@ func.func @m8n8k4_f64_row_row_row(%arg0: memref<128x128xf64>, %arg1: memref<128x
  
    // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowC0_map]]
    // CHECK-DAG: [[col:%.+]] = affine.apply [[$colC0_map]]
-  // CHECK: vector.store {{%.+}}, %arg2[[[row]], [[col]]] : memref<128x128xf64>, vector<2xf64>  
+  // CHECK: vector.store {{%.+}}, %arg2[[[row]], [[col]]] : memref<128x128xf64>, vector<2xf64>
    vector.transfer_write %D, %arg2[%c49, %c40] {in_bounds = [true, true]} : vector<8x8xf64>, memref<128x128xf64>
    return
  }
@@ -188,7 +188,7 @@ func.func @m16n8k16_fp16_row_row_row(%arg0: memref<20x20xf16, 3>, %arg1: memref<
    %cst = arith.constant 0.000000e+00 : f16
    // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowA_map]]
    // CHECK-DAG: [[col:%.+]] = affine.apply [[$colA_map]]
-  // CHECK: nvgpu.ldmatrix %arg0[[[row]], [[col]]] {numTiles = 4 : i32, transpose = false}  
+  // CHECK: nvgpu.ldmatrix %arg0[[[row]], [[col]]] {numTiles = 4 : i32, transpose = false}
  
    // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowB_map]]
    // CHECK-DAG: [[col:%.+]] = affine.apply [[$colB_map]]
@@ -226,7 +226,7 @@ func.func @m16n16k16_mmasync16816_fp16_f16_row_row_row(%arg0: memref<42x32xf16,
  
    // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowA_map]]
    // CHECK-DAG: [[col:%.+]] = affine.apply [[$colA_map]]
-  // CHECK: [[fragmentA:%.+]] = nvgpu.ldmatrix %arg0[[[row]], [[col]]] {numTiles = 4 : i32, transpose = false}  
+  // CHECK: [[fragmentA:%.+]] = nvgpu.ldmatrix %arg0[[[row]], [[col]]] {numTiles = 4 : i32, transpose = false}
    %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<42x32xf16, 3>, vector<16x16xf16>
  
    // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowB_map]]
@@ -277,17 +277,17 @@ func.func @batch_m16n8k16_fp16_row_row_row(%arg0: memref<2x20x20xf16, 3>, %arg1:
    %c1 = arith.constant 1 : index
    %c3 = arith.constant 3 : index
    %cst = arith.constant 0.000000e+00 : f16
-  
+
    // CHECK-DAG: [[row:%.+]] = affine.apply [[$Arow_map]]
    // CHECK-DAG: [[col:%.+]] = affine.apply [[$Acol_map]]
    // CHECK: nvgpu.ldmatrix %arg0[[[C0]], [[row]], [[col]]] {numTiles = 4 : i32, transpose = false} : memref<2x20x20xf16, 3> -> vector<4x2xf16>
    %A = vector.transfer_read %arg0[%c0, %c1, %c3], %cst {in_bounds = [true, true]} : memref<2x20x20xf16, 3>, vector<16x16xf16>
-  
+
    // CHECK-DAG: [[row:%.+]] = affine.apply [[$Brow_map]]
-  // CHECK-DAG: [[col:%.+]] = affine.apply [[$Bcol_map]]  
+  // CHECK-DAG: [[col:%.+]] = affine.apply [[$Bcol_map]]
    // CHECK: nvgpu.ldmatrix %arg1[[[C0]], [[row]], [[col]]] {numTiles = 2 : i32, transpose = true} : memref<2x20x20xf16, 3> -> vector<2x2xf16>
    %B = vector.transfer_read %arg1[%c0, %c3, %c3], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<2x20x20xf16, 3>, vector<8x16xf16>
-  
+
    // CHECK-DAG: [[row:%.+]] = affine.apply [[$Arow_map]]
    // CHECK-DAG: [[col:%.+]] = affine.apply [[$Acol_map]]
    // CHECK: nvgpu.ldmatrix %arg2[[[C0]], [[row]], [[col]]] {numTiles = 2 : i32, transpose = false} : memref<2x20x20xf16, 3> -> vector<2x2xf16>
@@ -324,14 +324,14 @@ func.func @m16n8k16_fp16_row_col_row(%arg0: memref<20x20xf16, 3>, %arg1: memref<
    // CHECK-DAG: [[col:%.+]] = affine.apply [[$colA_map]]
    // CHECK: nvgpu.ldmatrix %arg0[[[row]], [[col]]] {numTiles = 4 : i32
    // CHECK-SAME: transpose = false
-  
+
    // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowB_map]]
    // CHECK-DAG: [[col:%.+]] = affine.apply [[$colB_map]]
    // CHECK: nvgpu.ldmatrix %arg1[[[row]], [[col]]] {numTiles = 2 : i32
    // CHECK-SAME: transpose = false
  
    // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowA_map]]
-  // CHECK-DAG: [[col:%.+]] = affine.apply [[$colA_map]]   
+  // CHECK-DAG: [[col:%.+]] = affine.apply [[$colA_map]]
    // CHECK: nvgpu.ldmatrix %arg2[[[row]], [[col]]] {numTiles = 2 : i32
    // CHECK-SAME: transpose = false
    %A = vector.transfer_read %arg0[%c1, %c3], %cst {in_bounds = [true, true]} : memref<20x20xf16, 3>, vector<16x16xf16>
@@ -375,21 +375,21 @@ func.func @m16n8k4_tf32_f32_row_row_row(%arg0: memref<20x20xf32, 3>, %arg1: memr
  
    // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowA_map]]
    // CHECK-DAG: [[col:%.+]] = affine.apply [[$colA_map]]
-  // CHECK: [[a_frag:%.+]] = nvgpu.ldmatrix %arg0[[[row]], [[col]]] {numTiles = 2 : i32, transpose = false}  
+  // CHECK: [[a_frag:%.+]] = nvgpu.ldmatrix %arg0[[[row]], [[col]]] {numTiles = 2 : i32, transpose = false}
  
    // b and c are not loaded by ldmatrix in this test.
    // CHECK-NOT: nvgpu.ldmatrix
  
    // CHECK-DAG: [[row:%.+]] = affine.apply [[$rowB_map]]
    // CHECK-DAG: [[col:%.+]] = affine.apply [[$colB_map]]
-  // CHECK: [[b_el:%.+]] = memref.load {{%.+}} : memref<20x20xf32, 3>  
+  // CHECK: [[b_el:%.+]] = memref.load {{%.+}} : memref<20x20xf32, 3>
    // CHECK: [[b_frag:%.+]] = vector.insert [[b_el]], {{.*}} : f32 into vector<1x1xf32>
  
    // CHECK: [[d_frag:%.+]] = nvgpu.mma.sync([[a_frag]], [[b_frag]], [[c_frag]])
    // CHECK-SAME: mmaShape = [16, 8, 4]
    // CHECK-SAME: -> vector<2x2xf32>
    %A = vector.transfer_read %arg0[%c1, %c3], %cst {in_bounds = [true, true]} : memref<20x20xf32, 3>, vector<16x4xf32>
-  %B = vector.transfer_read %arg1[%c3, %c3], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<20x20xf32, 3>, vector<8x4xf32>  
+  %B = vector.transfer_read %arg1[%c3, %c3], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<20x20xf32, 3>, vector<8x4xf32>
    %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>} %A, %B, %cst_0 : vector<16x4xf32>, vector<8x4xf32> into vector<16x8xf32>
  
    // CHECK: vector.extract [[d_frag]][0] : vector<2x2xf32>
@@ -498,7 +498,7 @@ func.func @m16n8k64_int4_row_col_row(%arg0: memref<128x128xi4, 3>, %arg1: memref
    // CHECK: [[row:%.+]] = affine.apply [[$rowB0_map]]()[[[lane]]]
    // CHECK: [[col:%.+]] = affine.apply [[$colB0_map]]()[[[lane]]]
    // CHECK: nvgpu.ldmatrix %arg1[[[row]], [[col]]] {numTiles = 2 : i32, transpose = false} : memref<128x128xi4, 3> -> vector<2x8xi4>
-  
+
    // CHECK: [[lane:%.+]] = gpu.lane_id
    // CHECK: [[row:%.+]] = affine.apply [[$rowC0_map]]()[{{%.+}}]
    // CHECK: [[col:%.+]] = affine.apply [[$colC0_map]]()[{{%.+}}]
@@ -516,11 +516,11 @@ func.func @m16n8k64_int4_row_col_row(%arg0: memref<128x128xi4, 3>, %arg1: memref
    %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>} %A, %B, %C : vector<16x64xi4>, vector<8x64xi4> into vector<16x8xi32>
  
    // CHECK: [[lane:%.+]] = gpu.lane_id
-  // CHECK: [[v:%.+]] = vector.extract [[d]][0] : vector<2x2xi32>  
+  // CHECK: [[v:%.+]] = vector.extract [[d]][0] : vector<2x2xi32>
    // CHECK: [[row:%.+]] = affine.apply [[$rowC0_map]]()[[[lane]]]
    // CHECK: [[col:%.+]] = affine.apply [[$colC0_map]]()[[[lane]]]
    // CHECK: vector.store [[v]], %arg2[[[row]], [[col]]] : memref<128x128xi32>, vector<2xi32>
-  
+
    // CHECK: [[v:%.+]] = vector.extract [[d]][1] : vector<2x2xi32>
    // CHECK: [[row:%.+]] = affine.apply [[$rowC8_map]]()[[[lane]]]
    // CHECK: [[col:%.+]] = affine.apply [[$colC0_map]]()[[[lane]]]
@@ -567,7 +567,7 @@ func.func @m16n8k32_int8_row_col_row(%arg0: memref<128x128xi8, 3>, %arg1: memref
    // CHECK: [[row:%.+]] = affine.apply [[$rowB0_map]]()[[[lane]]]
    // CHECK: [[col:%.+]] = affine.apply [[$colB0_map]]()[[[lane]]]
    // CHECK: nvgpu.ldmatrix %arg1[[[row]], [[col]]] {numTiles = 2 : i32, transpose = false} : memref<128x128xi8, 3> -> vector<2x4xi8>
-  
+
    // CHECK: [[lane:%.+]] = gpu.lane_id
    // CHECK: [[row:%.+]] = affine.apply [[$rowC0_map]]()[[[lane]]]
    // CHECK: [[col:%.+]] = affine.apply [[$colC0_map]]()[[[lane]]]
@@ -584,7 +584,7 @@ func.func @m16n8k32_int8_row_col_row(%arg0: memref<128x128xi8, 3>, %arg1: memref
    %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>} %A, %B, %C : vector<16x32xi8>, vector<8x32xi8> into vector<16x8xi32>
  
    // CHECK: [[lane:%.+]] = gpu.lane_id
-  // CHECK: [[v:%.+]] = vector.extract [[d]][0] : vector<2x2xi32>  
+  // CHECK: [[v:%.+]] = vector.extract [[d]][0] : vector<2x2xi32>
    // CHECK: [[row:%.+]] = affine.apply [[$rowC0_map]]()[[[lane]]]
    // CHECK: [[col:%.+]] = affine.apply [[$colC0_map]]()[[[lane]]]
    // CHECK: vector.store [[v]], %arg2[[[row]], [[col]]] : memref<128x128xi32>, vector<2xi32>
@@ -651,7 +651,7 @@ func.func @m16n8k8_tf32_f32_col_col_row(%arg0: memref<20x20xf32, 3>, %arg1: memr
    // CHECK-SAME: -> vector<2x2xf32>
    %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true], permutation_map = #map0} : memref<20x20xf32, 3>, vector<16x8xf32>
    %B = vector.transfer_read %arg1[%c0, %c0], %cst {in_bounds = [true, true]} : memref<20x20xf32, 3>, vector<8x8xf32>
-  %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], 
+  %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"],
      kind = #vector.kind<add>} %A, %B, %cst_0 : vector<16x8xf32>, vector<8x8xf32> into vector<16x8xf32>
  
    // CHECK: vector.extract [[d_frag]][0] : vector<2x2xf32>
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir

index 24697bc..2724830 100644 (file)
--- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
@@ -418,7 +418,7 @@ func.func @outerproduct_add(%arg0: vector<2xf32>, %arg1: vector<3xf32>, %arg2: v
  
  
  // -----
- 
+
  func.func @shuffle_0D_direct(%arg0: vector<f32>) -> vector<3xf32> {
    %1 = vector.shuffle %arg0, %arg0 [0, 1, 0] : vector<f32>, vector<f32>
    return %1 : vector<3xf32>
diff --git a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir

index 9036d6b..72a387c 100644 (file)
--- a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
+++ b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
@@ -136,9 +136,9 @@ func.func @invariant_code_inside_affine_if() {
    // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32
    // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
    // CHECK-NEXT: affine.apply #map{{[0-9]*}}(%arg0)
-  // CHECK-NEXT: affine.if 
+  // CHECK-NEXT: affine.if
    // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32
-  // CHECK-NEXT: affine.store 
+  // CHECK-NEXT: affine.store
    // CHECK-NEXT: }
  
  
@@ -170,7 +170,7 @@ func.func @dependent_stores() {
  
    // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
    // CHECK-NEXT:   affine.store %[[mul]]
-  // CHECK-NEXT:   affine.store 
+  // CHECK-NEXT:   affine.store
  
    return
  }
@@ -227,8 +227,8 @@ func.func @load_dependent_store() {
    // CHECK-NEXT: arith.addf %[[cst]], %[[cst_0]] : f32
    // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32
    // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
-  // CHECK-NEXT: affine.for 
-  // CHECK-NEXT:   affine.store 
+  // CHECK-NEXT: affine.for
+  // CHECK-NEXT:   affine.store
    // CHECK-NEXT:   affine.load
  
    return
@@ -284,7 +284,7 @@ func.func @invariant_affine_if() {
    // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
    // CHECK-NEXT: }
    // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
-  // CHECK-NEXT: affine.if 
+  // CHECK-NEXT: affine.if
    // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32
    // CHECK-NEXT: affine.store
    // CHECK-NEXT: }
@@ -312,9 +312,9 @@ func.func @invariant_affine_if2() {
    // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32
    // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
    // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
-  // CHECK-NEXT: affine.if 
+  // CHECK-NEXT: affine.if
    // CHECK-NEXT:   arith.addf %[[cst]], %[[cst]] : f32
-  // CHECK-NEXT:   affine.store 
+  // CHECK-NEXT:   affine.store
    // CHECK-NEXT: }
    // CHECK-NEXT: }
  
@@ -343,10 +343,10 @@ func.func @invariant_affine_nested_if() {
    // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32
    // CHECK-NEXT: affine.for %[[arg0:.*]] = 0 to 10 {
    // CHECK-NEXT: affine.for %[[arg1:.*]] = 0 to 10 {
-  // CHECK-NEXT: affine.if 
+  // CHECK-NEXT: affine.if
    // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32
    // CHECK-NEXT: affine.store {{.*}}[%[[arg0]]] : memref<10xf32>
-  // CHECK-NEXT: affine.if 
+  // CHECK-NEXT: affine.if
    // CHECK-NEXT: affine.store {{.*}}[%[[arg1]]] : memref<10xf32>
    // CHECK-NEXT: }
    // CHECK-NEXT: }
@@ -379,10 +379,10 @@ func.func @invariant_affine_nested_if_else() {
    // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32
    // CHECK-NEXT: affine.for %[[arg0:.*]] = 0 to 10 {
    // CHECK-NEXT: affine.for %[[arg1:.*]] = 0 to 10 {
-  // CHECK-NEXT: affine.if 
+  // CHECK-NEXT: affine.if
    // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32
    // CHECK-NEXT: affine.store {{.*}}[%[[arg0]]] : memref<10xf32>
-  // CHECK-NEXT: affine.if 
+  // CHECK-NEXT: affine.if
    // CHECK-NEXT: affine.store {{.*}}[%[[arg0]]] : memref<10xf32>
    // CHECK-NEXT: } else {
    // CHECK-NEXT: affine.store {{.*}}[%[[arg1]]] : memref<10xf32>
@@ -420,10 +420,10 @@ func.func @invariant_affine_nested_if_else2() {
    // CHECK-NEXT: affine.for %[[arg0:.*]] = 0 to 10 {
    // CHECK-NEXT: }
    // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
-  // CHECK-NEXT: affine.if 
+  // CHECK-NEXT: affine.if
    // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32
    // CHECK-NEXT: affine.load {{.*}}[%[[arg0]]] : memref<10xf32>
-  // CHECK-NEXT: affine.if 
+  // CHECK-NEXT: affine.if
    // CHECK-NEXT: affine.store {{.*}}[%[[arg0]]] : memref<10xf32>
    // CHECK-NEXT: } else {
    // CHECK-NEXT: affine.load {{.*}}[%[[arg0]]] : memref<10xf32>
@@ -456,10 +456,10 @@ func.func @invariant_affine_nested_if2() {
    // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
    // CHECK-NEXT: }
    // CHECK-NEXT: affine.for %[[arg0:.*]] = 0 to 10 {
-  // CHECK-NEXT: affine.if 
+  // CHECK-NEXT: affine.if
    // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32
    // CHECK-NEXT: affine.load {{.*}}[%[[arg0]]] : memref<10xf32>
-  // CHECK-NEXT: affine.if 
+  // CHECK-NEXT: affine.if
    // CHECK-NEXT: affine.load {{.*}}[%[[arg0]]] : memref<10xf32>
    // CHECK-NEXT: }
    // CHECK-NEXT: }
@@ -489,7 +489,7 @@ func.func @invariant_affine_for_inside_affine_if() {
    // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32
    // CHECK-NEXT: affine.for %[[arg0:.*]] = 0 to 10 {
    // CHECK-NEXT: affine.for %[[arg1:.*]] = 0 to 10 {
-  // CHECK-NEXT: affine.if 
+  // CHECK-NEXT: affine.if
    // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32
    // CHECK-NEXT: affine.store {{.*}}[%[[arg0]]] : memref<10xf32>
    // CHECK-NEXT: affine.for %[[arg2:.*]] = 0 to 10 {
@@ -516,7 +516,7 @@ func.func @invariant_constant_and_load() {
    // CHECK: memref.alloc() : memref<100xf32>
    // CHECK-NEXT: memref.alloc() : memref<100xf32>
    // CHECK-NEXT: arith.constant 0 : index
-  // CHECK-NEXT: affine.load 
+  // CHECK-NEXT: affine.load
    // CHECK-NEXT: affine.for %{{.*}} = 0 to 5 {
    // CHECK-NEXT:  affine.store
  
@@ -542,7 +542,7 @@ func.func @nested_load_store_same_memref() {
    // CHECK-NEXT: arith.constant 0 : index
    // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
    // CHECK-NEXT:   affine.load
-  // CHECK-NEXT:   affine.for 
+  // CHECK-NEXT:   affine.for
    // CHECK-NEXT:    affine.store %[[cst]]
  
  
diff --git a/mlir/test/Dialect/Affine/affine-loop-normalize.mlir b/mlir/test/Dialect/Affine/affine-loop-normalize.mlir

index d4d2cce..0c50974 100644 (file)
--- a/mlir/test/Dialect/Affine/affine-loop-normalize.mlir
+++ b/mlir/test/Dialect/Affine/affine-loop-normalize.mlir
@@ -168,8 +168,8 @@ func.func @loop_with_multiple_upper_bounds(%arg0: memref<?x?xf32>, %arg1 : index
  // CHECK-NEXT:                %{{.*}} = affine.load %[[ARG0]][%[[IIIV]], %[[KKIV]]] : memref<1024x1024xf32>
  // CHECK-NEXT:                %{{.*}} = affine.load %[[ARG1]][%[[KKIV]], %[[JJIV]]] : memref<1024x1024xf32>
  // CHECK-NEXT:                %{{.*}} = affine.load %[[ARG2]][%[[IIIV]], %[[JJIV]]] : memref<1024x1024xf32>
-// CHECK-NEXT:                %{{.*}} = arith.mulf 
-// CHECK-NEXT:                %{{.*}} = arith.addf 
+// CHECK-NEXT:                %{{.*}} = arith.mulf
+// CHECK-NEXT:                %{{.*}} = arith.addf
  // CHECK-NEXT:                affine.store %{{.*}}, %[[ARG2]]{{.*}} : memref<1024x1024xf32>
  // CHECK-NEXT:              }
  // CHECK-NEXT:            }
diff --git a/mlir/test/Dialect/Affine/ops.mlir b/mlir/test/Dialect/Affine/ops.mlir

index 9485dae..f55d59a 100644 (file)
--- a/mlir/test/Dialect/Affine/ops.mlir
+++ b/mlir/test/Dialect/Affine/ops.mlir
@@ -50,7 +50,7 @@ func.func @affine.yield() {
    // CHECK-NEXT: }
    //
    // GENERIC:      "affine.for"() ({
-  // GENERIC-NEXT: ^bb0(%{{.*}}: index):       
+  // GENERIC-NEXT: ^bb0(%{{.*}}: index):
    // GENERIC-NEXT:   "affine.yield"() : () -> ()
    // GENERIC-NEXT: }) {lower_bound = #map, step = 1 : index, upper_bound = #map1} : () -> ()
    affine.for %i = 0 to 10 {
diff --git a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir

index b322fb7..bc414a3 100644 (file)
--- a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir
+++ b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir
@@ -1,12 +1,12 @@
-// RUN: mlir-opt --test-transform-dialect-interpreter --split-input-file  -canonicalize -cse --verify-diagnostics %s 
+// RUN: mlir-opt --test-transform-dialect-interpreter --split-input-file  -canonicalize -cse --verify-diagnostics %s
  
-func.func @map_nested_foreach_to_threads_not_gpu_launch() -> () {  
+func.func @map_nested_foreach_to_threads_not_gpu_launch() -> () {
    %1 = tensor.empty() : tensor<4xf32>
-  return 
+  return
  }
  transform.sequence failures(propagate) {
  ^bb0(%arg0: !pdl.operation):
-  %funcop = transform.structured.match ops{["tensor.empty"]} in %arg0    
+  %funcop = transform.structured.match ops{["tensor.empty"]} in %arg0
    // expected-error @below {{Given target is not gpu.launch}}
    %1 = transform.gpu.map_nested_foreach_to_threads %funcop
  }
@@ -128,8 +128,8 @@ func.func @map_nested_foreach_to_threads_not_buffer(%x: tensor<32x32xf32>, %y: t
    {
      %t = linalg.matmul ins(%x, %y: tensor<32x32xf32>, tensor<32x32xf32>) outs(%z : tensor<32x32xf32>) -> tensor<32x32xf32>
      gpu.terminator
-  }  
-  return 
+  }
+  return
  }
  
  transform.sequence failures(propagate) {
@@ -137,21 +137,21 @@ transform.sequence failures(propagate) {
    %matmul = transform.structured.match ops{["linalg.matmul"]} in %arg0
    %foreach, %tiled = transform.structured.tile_to_foreach_thread_op %matmul num_threads [10, 20, 30] (mapping = [ #gpu.thread<y>, #gpu.thread<x>, #gpu.thread<z> ] )
    %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0
-  // expected-error @below {{only bufferized scf.foreach_thread lowers to gpu.thread_id}}    
+  // expected-error @below {{only bufferized scf.foreach_thread lowers to gpu.thread_id}}
    transform.gpu.map_nested_foreach_to_threads %funcop { blockDim = [128, 4, 1] }
  }
  
  // -----
  
  
-func.func @map_foreach_to_blocks_not_gpu_launch() -> () {  
+func.func @map_foreach_to_blocks_not_gpu_launch() -> () {
    // expected-note @below {{when applied to this payload op}}
    %1 = tensor.empty() : tensor<4xf32>
-  return 
+  return
  }
  transform.sequence failures(propagate) {
  ^bb0(%arg0: !pdl.operation):
-  %funcop = transform.structured.match ops{["tensor.empty"]} in %arg0    
+  %funcop = transform.structured.match ops{["tensor.empty"]} in %arg0
    // expected-error @below {{Given target is not gpu.launch}}
    %1 = transform.gpu.map_foreach_to_blocks %funcop
  }
@@ -187,9 +187,9 @@ func.func @map_foreach_to_blocks_not_unique(%x: memref<2 x 32 x f32>, %y: memref
  }
  
  transform.sequence failures(propagate) {
-^bb0(%arg0: !pdl.operation):  
-  %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0    
-  // expected-error @below {{could not find a unique topLevel scf.foreach_thread}}  
+^bb0(%arg0: !pdl.operation):
+  %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0
+  // expected-error @below {{could not find a unique topLevel scf.foreach_thread}}
    %1 = transform.gpu.map_foreach_to_blocks %funcop
  }
  
@@ -215,14 +215,14 @@ func.func @map_foreach_to_blocks_large_loop(%x: memref<2 x 32 x f32>, %y: memref
        %6 = math.fma %alpha, %4, %5 : f32
        memref.store %6, %y[%i, %j] : memref<2 x 32 x f32>
    }  { mapping = [#gpu.thread<y>, #gpu.thread<x>] }
-  
+
    return %y : memref<2 x 32 x f32>
  }
  
  transform.sequence failures(propagate) {
-^bb0(%arg0: !pdl.operation):  
-  %funcop = transform.structured.match ops{["func.func"]} in %arg0   
-  // expected-error @below {{could not find a unique topLevel scf.foreach_thread}}   
+^bb0(%arg0: !pdl.operation):
+  %funcop = transform.structured.match ops{["func.func"]} in %arg0
+  // expected-error @below {{could not find a unique topLevel scf.foreach_thread}}
    %1 = transform.gpu.map_foreach_to_blocks %funcop { generate_gpu_launch }
  }
  
@@ -241,8 +241,8 @@ func.func @map_foreach_to_blocks_large_loop(%x: memref<2 x 32 x f32>, %y: memref
  }
  
  transform.sequence failures(propagate) {
-^bb0(%arg0: !pdl.operation):  
-  %funcop = transform.structured.match ops{["func.func"]} in %arg0   
+^bb0(%arg0: !pdl.operation):
+  %funcop = transform.structured.match ops{["func.func"]} in %arg0
    // expected-error @below {{Trying to launch a GPU kernel with gridDim = (65535, 65535, 1) blockDim = (1, 1, 1). It is larger than the limits.}}
    %1 = transform.gpu.map_foreach_to_blocks %funcop { generate_gpu_launch }
  }
diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir

index e43cc78..150f308 100644 (file)
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -83,7 +83,7 @@ func.func @llvm_nvvm_bar_warp_sync(%mask : i32) {
    // CHECK: nvvm.bar.warp.sync %{{.*}}
    nvvm.bar.warp.sync %mask : i32
    llvm.return
-}  
+}
  
  // CHECK-LABEL: @nvvm_mma_m8n8k4_row_col_f32_f32
  func.func @nvvm_mma_m8n8k4_row_col_f32_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
diff --git a/mlir/test/Dialect/Linalg/collapse-dim.mlir b/mlir/test/Dialect/Linalg/collapse-dim.mlir

index 3587557..0bd2bc1 100644 (file)
--- a/mlir/test/Dialect/Linalg/collapse-dim.mlir
+++ b/mlir/test/Dialect/Linalg/collapse-dim.mlir
@@ -20,8 +20,8 @@ func.func @collapse_reduction(
  
  // CHECK-LABEL: func @collapse_reduction
  //       CHECK:   %[[T:.*]] = tensor.collapse_shape %{{.*}} {{\[}}[0], [1], [2, 3]] : tensor<2x32x10x4096xf32> into tensor<2x32x40960xf32>
-//       CHECK:   linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], 
-//  CHECK-SAME:     iterator_types = ["parallel", "parallel", "reduction"]} 
+//       CHECK:   linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]],
+//  CHECK-SAME:     iterator_types = ["parallel", "parallel", "reduction"]}
  //  CHECK-SAME:     ins(%[[T]] : tensor<2x32x40960xf32>) outs(%{{.*}} : tensor<2x32xf32>) {
  //       CHECK:   } -> tensor<2x32xf32>
  
@@ -48,8 +48,8 @@ func.func @collapse_parallel(
  // CHECK-LABEL: func @collapse_parallel
  //   CHECK-DAG:  %[[S:.*]] = tensor.collapse_shape %{{.*}} {{\[}}[0], [1], [2, 3]] : tensor<32x2x10x4096xf32> into tensor<32x2x40960xf32>
  //   CHECK-DAG:  %[[D:.*]] = tensor.collapse_shape %{{.*}} {{\[}}[0], [1], [2, 3]] : tensor<2x32x10x4096xf32> into tensor<2x32x40960xf32>
-//       CHECK:  %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], 
-//  CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel"]} 
+//       CHECK:  %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]],
+//  CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel"]}
  //  CHECK-SAME:     ins(%[[S]] : tensor<32x2x40960xf32>) outs(%[[D]] : tensor<2x32x40960xf32>) {
  //       CHECK:   } -> tensor<2x32x40960xf32>
  //       CHECK:  tensor.expand_shape %[[R]] {{\[}}[0], [1], [2, 3]] : tensor<2x32x40960xf32> into tensor<2x32x10x4096xf32>
diff --git a/mlir/test/Dialect/Linalg/decompose-ops.mlir b/mlir/test/Dialect/Linalg/decompose-ops.mlir

index df29e63..d5be029 100644 (file)
--- a/mlir/test/Dialect/Linalg/decompose-ops.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-ops.mlir
@@ -10,7 +10,7 @@ func.func @simple_op(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?xf32>, %arg2 : ten
    %init1 = tensor.empty(%d1, %d0) : tensor<?x?xf32>
    %init2 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
    %result:2 = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, 
+    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>,
                       affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d1, d0)>,
                       affine_map<(d0, d1) -> (d0, d1)>],
      iterator_types = ["parallel", "parallel"]}
@@ -119,7 +119,7 @@ func.func @simple_op_permuted_outputs(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x
    %init1 = tensor.empty(%d1, %d0) : tensor<?x?xf32>
    %init2 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
    %result:3 = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, 
+    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>,
                       affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d1, d0)>,
                       affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
      iterator_types = ["parallel", "parallel"]}
diff --git a/mlir/test/Dialect/Linalg/detensorize_0d.mlir b/mlir/test/Dialect/Linalg/detensorize_0d.mlir

index 6fa8430..13baab3 100644 (file)
--- a/mlir/test/Dialect/Linalg/detensorize_0d.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_0d.mlir
@@ -7,7 +7,7 @@ func.func @detensor_simple(%arg1: tensor<f32>, %arg2: tensor<f32>) -> tensor<f32
    %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
      ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
      outs(%0 : tensor<f32>) {
-  ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  
+  ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
      %2 = arith.addf %arg3, %arg4 : f32
      linalg.yield %2 : f32
    } -> tensor<f32>
@@ -26,7 +26,7 @@ func.func @detensor_op_sequence(%arg1: tensor<f32>, %arg2: tensor<f32>) -> tenso
    %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
      ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
      outs(%0 : tensor<f32>) {
-  ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  
+  ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
      %2 = arith.addf %arg3, %arg4 : f32
      linalg.yield %2 : f32
    } -> tensor<f32>
@@ -35,7 +35,7 @@ func.func @detensor_op_sequence(%arg1: tensor<f32>, %arg2: tensor<f32>) -> tenso
    %4 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
      ins(%arg1, %1 : tensor<f32>, tensor<f32>)
      outs(%3 : tensor<f32>) {
-  ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  
+  ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
      %5 = arith.mulf %arg3, %arg4 : f32
      linalg.yield %5 : f32
    } -> tensor<f32>
@@ -44,7 +44,7 @@ func.func @detensor_op_sequence(%arg1: tensor<f32>, %arg2: tensor<f32>) -> tenso
    %7 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
      ins(%1, %4 : tensor<f32>, tensor<f32>)
      outs(%6 : tensor<f32>) {
-  ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  
+  ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
      %5 = arith.divf %arg3, %arg4 : f32
      linalg.yield %5 : f32
    } -> tensor<f32>
@@ -66,7 +66,7 @@ func.func @detensor_multiple_ops(%arg1: tensor<f32>, %arg2: tensor<f32>) -> tens
    %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
      ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
      outs(%0 : tensor<f32>) {
-  ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  
+  ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
      %2 = arith.addf %arg3, %arg4 : f32
      %3 = arith.mulf %2, %arg4 : f32
      linalg.yield %3 : f32
@@ -87,7 +87,7 @@ func.func @detensor_foreign_op(%arg1: tensor<f32>, %arg2: tensor<f32>) -> tensor
    %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
      ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
      outs(%0 : tensor<f32>) {
-  ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  
+  ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
      %2 = "foreign.do_something"(%arg3, %arg4) {} : (f32, f32) -> f32
      linalg.yield %2 : f32
    } -> tensor<f32>
diff --git a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir

index 87820d0..f36f72a 100644 (file)
--- a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
@@ -11,7 +11,7 @@ func.func @if_true_test(%arg0: i1, %arg1: i32) -> tensor<i32> attributes {} {
      {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []}
      ins(%arg0_t : tensor<i1>)
      outs(%2 : tensor<i8>) {
-  ^bb0(%arg2: i1, %arg3: i8):  
+  ^bb0(%arg2: i1, %arg3: i8):
      %10 = arith.extui %arg2 : i1 to i8
      linalg.yield %10 : i8
    } -> tensor<i8>
@@ -24,7 +24,7 @@ func.func @if_true_test(%arg0: i1, %arg1: i32) -> tensor<i32> attributes {} {
      {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []}
      ins(%arg1_t, %cst : tensor<i32>, tensor<i32>)
      outs(%6 : tensor<i32>) {
-  ^bb0(%arg2: i32, %arg3: i32, %arg4: i32):  
+  ^bb0(%arg2: i32, %arg3: i32, %arg4: i32):
      %10 = arith.addi %arg2, %arg3 : i32
      linalg.yield %10 : i32
    } -> tensor<i32>
diff --git a/mlir/test/Dialect/Linalg/detensorize_if.mlir b/mlir/test/Dialect/Linalg/detensorize_if.mlir

index d11c5a5..8d17763 100644 (file)
--- a/mlir/test/Dialect/Linalg/detensorize_if.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_if.mlir
@@ -19,7 +19,7 @@ func.func @main() -> (tensor<i32>) attributes {} {
    %4 = linalg.generic #attrs
      ins(%2, %1 : tensor<i32>, tensor<i32>)
      outs(%3 : tensor<i1>) {
-    ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):  
+    ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
        %8 = arith.cmpi slt, %arg0, %arg1 : i32
        linalg.yield %8 : i1
    } -> tensor<i1>
@@ -31,7 +31,7 @@ func.func @main() -> (tensor<i32>) attributes {} {
    %8 = linalg.generic #attrs
      ins(%6, %6 : tensor<i32>, tensor<i32>)
      outs(%7 : tensor<i32>) {
-    ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):  
+    ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):
        %9 = arith.addi %arg0, %arg1 : i32
        linalg.yield %9 : i32
    } -> tensor<i32>
@@ -80,7 +80,7 @@ func.func @main() -> (tensor<i32>) attributes {} {
    %4 = linalg.generic #attrs
      ins(%2, %1 : tensor<i32>, tensor<i32>)
      outs(%3 : tensor<i1>) {
-    ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):  
+    ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
        %8 = arith.cmpi slt, %arg0, %arg1 : i32
        linalg.yield %8 : i1
    } -> tensor<i1>
@@ -92,7 +92,7 @@ func.func @main() -> (tensor<i32>) attributes {} {
    %8 = linalg.generic #attrs
      ins(%6, %6 : tensor<i32>, tensor<i32>)
      outs(%7 : tensor<i32>) {
-    ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):  
+    ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):
        %9 = arith.addi %arg0, %arg1 : i32
        linalg.yield %9 : i32
    } -> tensor<i32>
@@ -143,7 +143,7 @@ func.func @main() -> (tensor<i32>) attributes {} {
    %4 = linalg.generic #attrs
      ins(%2, %1 : tensor<i32>, tensor<i32>)
      outs(%3 : tensor<i1>) {
-    ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):  
+    ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
        %8 = arith.cmpi slt, %arg0, %arg1 : i32
        linalg.yield %8 : i1
    } -> tensor<i1>
@@ -160,7 +160,7 @@ func.func @main() -> (tensor<i32>) attributes {} {
    %8 = linalg.generic #attrs
      ins(%6, %12 : tensor<i32>, tensor<i32>)
      outs(%7 : tensor<i32>) {
-    ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):  
+    ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):
        %9 = arith.addi %arg0, %arg1 : i32
        linalg.yield %9 : i32
    } -> tensor<i32>
diff --git a/mlir/test/Dialect/Linalg/detensorize_while.mlir b/mlir/test/Dialect/Linalg/detensorize_while.mlir

index e10c46c..aa30900 100644 (file)
--- a/mlir/test/Dialect/Linalg/detensorize_while.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_while.mlir
@@ -16,7 +16,7 @@ func.func @main(%farg0: tensor<i32>, %farg1: tensor<i32>) -> tensor<i32> attribu
    %2 = linalg.generic #attrs
      ins(%0, %farg1 : tensor<i32>, tensor<i32>)
      outs(%1 : tensor<i1>) {
-    ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):  
+    ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
        %8 = arith.cmpi slt, %arg0, %arg1 : i32
        linalg.yield %8 : i1
    } -> tensor<i1>
@@ -28,7 +28,7 @@ func.func @main(%farg0: tensor<i32>, %farg1: tensor<i32>) -> tensor<i32> attribu
    %6 = linalg.generic #attrs
      ins(%4, %4 : tensor<i32>, tensor<i32>)
      outs(%5 : tensor<i32>) {
-    ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):  
+    ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):
        %8 = arith.addi %arg0, %arg1 : i32
        linalg.yield %8 : i32
    } -> tensor<i32>
diff --git a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir

index 5af2ff8..955c7be 100644 (file)
--- a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
@@ -38,7 +38,7 @@ func.func @main(%farg0: tensor<10xi32>, %farg1: tensor<i32>) -> tensor<i32> attr
    %4 = linalg.generic #attrs
      ins(%2, %farg1 : tensor<i32>, tensor<i32>)
      outs(%3 : tensor<i1>) {
-    ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):  
+    ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
        %8 = arith.cmpi slt, %arg0, %arg1 : i32
        linalg.yield %8 : i1
    } -> tensor<i1>
@@ -68,7 +68,7 @@ func.func @main(%farg0: tensor<10xi32>, %farg1: tensor<i32>) -> tensor<i32> attr
  // DET-ALL:       ^[[bb1]](%{{.*}}: tensor<10xi32>)
  // DET-ALL:         tensor.empty() : tensor<i32>
  // DET-ALL:         linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) outs(%{{.*}} : tensor<i32>) {
-// DET-ALL:         ^bb0(%{{.*}}: i32, %{{.*}}: i32):  
+// DET-ALL:         ^bb0(%{{.*}}: i32, %{{.*}}: i32):
  // DET-ALL:           %{{.*}} = arith.addi %{{.*}}, %{{.*}}
  // DET-ALL:           linalg.yield %{{.*}} : i32
  // DET-ALL:         } -> tensor<i32>
diff --git a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir

index 9e07063..455fcfe 100644 (file)
--- a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
@@ -21,7 +21,7 @@ func.func @main() -> () attributes {} {
    %4 = linalg.generic #attrs
      ins(%2, %reshaped1 : tensor<i32>, tensor<i32>)
      outs(%3 : tensor<i1>) {
-    ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):  
+    ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
        %8 = arith.cmpi slt, %arg0, %arg1 : i32
        linalg.yield %8 : i1
    } -> tensor<i1>
@@ -33,7 +33,7 @@ func.func @main() -> () attributes {} {
    %8 = linalg.generic #attrs
      ins(%6, %6 : tensor<i32>, tensor<i32>)
      outs(%7 : tensor<i32>) {
-    ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):  
+    ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):
        %9 = arith.addi %arg0, %arg1 : i32
        linalg.yield %9 : i32
    } -> tensor<i32>
diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir

index ffa9563..f9f06b0 100644 (file)
--- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
+++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
@@ -868,6 +868,6 @@ func.func @drop_all_loops(%arg0 : memref<1x1xf32, 3>) -> memref<1x1xf32, 3>
  }
  
  // CHECK-LABEL: func @drop_all_loops
-//       CHECK:   memref.collapse_shape 
+//       CHECK:   memref.collapse_shape
  //  CHECK-SAME:     [] : memref<1x1xf32, 3> into memref<f32, 3>
  //       CHECK:   linalg.generic{{.*}}memref<f32, 3>
diff --git a/mlir/test/Dialect/Linalg/erase-unused-operands-and-results.mlir b/mlir/test/Dialect/Linalg/erase-unused-operands-and-results.mlir

index 101c4d1..8bb6153 100644 (file)
--- a/mlir/test/Dialect/Linalg/erase-unused-operands-and-results.mlir
+++ b/mlir/test/Dialect/Linalg/erase-unused-operands-and-results.mlir
@@ -159,7 +159,7 @@ func.func @drop_dead_results(%arg0 : tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, t
        %1 = arith.addf %b0, %b0: f32
        linalg.yield %1, %1, %1, %1 : f32, f32, f32, f32
      } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-  return %0#0, %0#2 : tensor<?x?x?xf32>, tensor<?x?x?xf32>     
+  return %0#0, %0#2 : tensor<?x?x?xf32>, tensor<?x?x?xf32>
  }
  //  CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
  //  CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d2, d1)>
@@ -361,7 +361,7 @@ func.func @drop_dead_results_with_different_tensors(%arg0 : tensor<?x?x?xf32>) -
      ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) :
        linalg.yield %b0, %b0, %b3, %b4 : f32, f32, f32, f32
      } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-  return %0#0, %0#1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>     
+  return %0#0, %0#1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>
  }
  
  //  CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
@@ -403,7 +403,7 @@ func.func @drop_dead_results_with_unused_cycles(%arg0 : tensor<?x?x?xf32>) -> (t
        %3 = arith.addf %b0, %b4: f32
        linalg.yield %1, %1, %2, %3 : f32, f32, f32, f32
      } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-  return %0#0, %0#1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>     
+  return %0#0, %0#1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>
  }
  
  //  CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
diff --git a/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir b/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir

index 3822fe8..2f265a7 100644 (file)
--- a/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir
+++ b/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir
@@ -97,7 +97,7 @@ func.func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>, %shape: tensor
    %0 = linalg.generic #trait
         ins(%arg0 : tensor<1x5xf32>)
        outs(%shape : tensor<5xf32>) {
-    ^bb0(%arg2: f32, %arg3: f32):     
+    ^bb0(%arg2: f32, %arg3: f32):
        linalg.yield %arg2 : f32
    } -> tensor<5xf32>
    return %0 : tensor<5xf32>
diff --git a/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir b/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir

index 43450c4..50d308b 100644 (file)
--- a/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir
+++ b/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir
@@ -1,7 +1,7 @@
  // RUN: mlir-opt %s -test-linalg-elementwise-fusion-patterns=fuse-with-reshape-by-collapsing -split-input-file | FileCheck %s
  // RUN: mlir-opt %s -test-linalg-elementwise-fusion-patterns=fuse-with-reshape-by-collapsing-control -split-input-file | FileCheck %s --check-prefix=CONTROL
  
-// Static problem sizes. Checks all aspects of fusion by collapsing. Rest of the 
+// Static problem sizes. Checks all aspects of fusion by collapsing. Rest of the
  // tests only check a subset of conditions.
  #map0 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2, d3, d4, d5, d6, d7)>
  #map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0, d1, d2)>
diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir

index 0de109e..2950b27 100644 (file)
--- a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
@@ -14,7 +14,7 @@ func.func @add_mul_fusion(%arg0: tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, %arg2
    %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]}
        ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
        outs(%2 : tensor<?x?xf32>) {
-    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):       
+    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
        %4 = arith.addf %arg3, %arg4 : f32
        linalg.yield %4 : f32
    } -> tensor<?x?xf32>
@@ -27,7 +27,7 @@ func.func @add_mul_fusion(%arg0: tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, %arg2
      // CHECK-SAME: [[ARG0:%[a-zA-Z0-9_]*]]
      // CHECK-SAME: [[ARG1:%[a-zA-Z0-9_]*]]
      // CHECK-SAME: [[ARG2:%[a-zA-Z0-9_]*]]
-    ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):       
+    ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
        // CHECK: [[T1:%[a-zA-Z0-9_]*]] = arith.addf [[ARG0]], [[ARG1]]
        // CHECK-NOT: linalg.yield
        // CHECK: arith.mulf [[T1]], [[ARG2]]
@@ -56,7 +56,7 @@ func.func @scalar_add_mul_fusion(%arg0: tensor<?x?xf32>, %arg1 : f32, %arg2 : f3
    %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
        ins(%arg0, %arg1 : tensor<?x?xf32>, f32)
        outs(%2 : tensor<?x?xf32>) {
-    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):       
+    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
        %4 = arith.addf %arg3, %arg4 : f32
        linalg.yield %4 : f32
    } -> tensor<?x?xf32>
@@ -69,7 +69,7 @@ func.func @scalar_add_mul_fusion(%arg0: tensor<?x?xf32>, %arg1 : f32, %arg2 : f3
      // CHECK-SAME: [[ARG3:%[a-zA-Z0-9_]*]]
      // CHECK-SAME: [[ARG4:%[a-zA-Z0-9_]*]]
      // CHECK-SAME: [[ARG5:%[a-zA-Z0-9_]*]]
-    ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):       
+    ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
        // CHECK: [[T1:%[a-zA-Z0-9_]*]] = arith.addf [[ARG3]], [[ARG4]]
        // CHECK-NOT: linalg.yield
        // CHECK: arith.mulf [[T1]], [[ARG5]]
@@ -98,7 +98,7 @@ func.func @transpose_add_mul_fusion(%arg0: tensor<?x?xf32>, %arg1 : tensor<?x?xf
    %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
        ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
        outs(%2 : tensor<?x?xf32>) {
-    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):       
+    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
        %4 = arith.addf %arg3, %arg4 : f32
        linalg.yield %4 : f32
    } -> tensor<?x?xf32>
@@ -107,7 +107,7 @@ func.func @transpose_add_mul_fusion(%arg0: tensor<?x?xf32>, %arg1 : tensor<?x?xf
    %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]}
        ins(%3, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>)
        outs(%2 : tensor<?x?xf32>) {
-    ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):       
+    ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
        %5 = arith.mulf %arg5, %arg6 : f32
        linalg.yield %5 : f32
      } -> tensor<?x?xf32>
@@ -132,7 +132,7 @@ func.func @add_transpose_mul_fusion(%arg0: tensor<?x?xf32>, %arg1 : tensor<?x?xf
    %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
        ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
        outs(%2 : tensor<?x?xf32>) {
-    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):       
+    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
        %4 = arith.addf %arg3, %arg4 : f32
        linalg.yield %4 : f32
    } -> tensor<?x?xf32>
@@ -141,7 +141,7 @@ func.func @add_transpose_mul_fusion(%arg0: tensor<?x?xf32>, %arg1 : tensor<?x?xf
    %4 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]}
        ins(%3, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>)
        outs(%2 : tensor<?x?xf32>){
-    ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):       
+    ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
        %5 = arith.mulf %arg5, %arg6 : f32
        linalg.yield %5 : f32
      } -> tensor<?x?xf32>
@@ -166,7 +166,7 @@ func.func @add_broadcast_mul_fusion(%arg0: tensor<?xf32>, %arg1 : tensor<?xf32>,
    %2 = linalg.generic {indexing_maps = [#map2, #map2, #map2], iterator_types = ["parallel"]}
        ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
        outs(%1 : tensor<?xf32>) {
-    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):       
+    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
        %3 = arith.addf %arg3, %arg4 : f32
        linalg.yield %3 : f32
    } -> tensor<?xf32>
@@ -177,7 +177,7 @@ func.func @add_broadcast_mul_fusion(%arg0: tensor<?xf32>, %arg1 : tensor<?xf32>,
    %5 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]}
        ins(%2, %arg2 : tensor<?xf32>, tensor<?x?xf32>)
        outs(%4 : tensor<?x?xf32>){
-    ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):       
+    ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
        %6 = arith.mulf %arg5, %arg6 : f32
        linalg.yield %6 : f32
      } -> tensor<?x?xf32>
@@ -196,7 +196,7 @@ func.func @add_mul_scalar_fusion(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2:
    %1 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = []}
        ins(%arg0, %arg1 : tensor<f32>, tensor<f32>)
        outs(%0 : tensor<f32>) {
-    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):       
+    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
        %2 = arith.addf %arg3, %arg4 : f32
        linalg.yield %2 : f32
    } -> tensor<f32>
@@ -206,7 +206,7 @@ func.func @add_mul_scalar_fusion(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2:
    %2 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = []}
        ins(%1, %arg2 : tensor<f32>, tensor<f32>)
        outs(%0 : tensor<f32>) {
-    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):       
+    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
        %3 = arith.mulf %arg3, %arg4 : f32
        linalg.yield %3 : f32
    } -> tensor<f32>
@@ -292,7 +292,7 @@ func.func @producer_indexed_consumer_fusion(%arg0: tensor<?x?xi32>,
      iterator_types = ["parallel", "parallel"] }
      ins(%arg0, %arg1  : tensor<?x?xi32>, tensor<?x?xi32>)
      outs(%2 : tensor<?x?xi32>) {
-    ^bb0(%arg2: i32, %arg3: i32, %arg4: i32):       
+    ^bb0(%arg2: i32, %arg3: i32, %arg4: i32):
        %10 = arith.addi %arg2, %arg3 : i32
        linalg.yield %10 : i32
      } -> tensor<?x?xi32>
@@ -301,7 +301,7 @@ func.func @producer_indexed_consumer_fusion(%arg0: tensor<?x?xi32>,
      iterator_types = ["parallel", "parallel"] }
      ins(%3 : tensor<?x?xi32>)
      outs(%2 : tensor<?x?xi32>) {
-    ^bb0(%arg2: i32, %arg3: i32):       
+    ^bb0(%arg2: i32, %arg3: i32):
        %idx0 = linalg.index 0 : index
        %idx1 = linalg.index 1 : index
        %5 = arith.index_cast %idx0 : index to i32
@@ -343,7 +343,7 @@ func.func @indexed_producer_consumer_fusion(%arg0: tensor<?x?xi32>) -> tensor<?x
      iterator_types = ["parallel", "parallel"] }
      ins(%arg0 : tensor<?x?xi32>)
      outs(%2 : tensor<?x?xi32>) {
-    ^bb0(%arg4: i32, %arg5: i32):       
+    ^bb0(%arg4: i32, %arg5: i32):
        %idx0 = linalg.index 0 : index
        %idx1 = linalg.index 1 : index
        %4 = arith.index_cast %idx0 : index to i32
@@ -357,7 +357,7 @@ func.func @indexed_producer_consumer_fusion(%arg0: tensor<?x?xi32>) -> tensor<?x
      iterator_types = ["parallel", "parallel"] }
      ins(%3, %arg0 : tensor<?x?xi32>, tensor<?x?xi32>)
      outs(%2 : tensor<?x?xi32>) {
-    ^bb0(%arg2: i32, %arg3: i32, %arg4: i32):       
+    ^bb0(%arg2: i32, %arg3: i32, %arg4: i32):
        %10 = arith.addi %arg2, %arg3 : i32
        linalg.yield %10 : i32
      } -> tensor<?x?xi32>
@@ -397,7 +397,7 @@ func.func @indexed_producer_indexed_consumer_fusion(%arg0: tensor<?x?xi32>)
      iterator_types = ["parallel", "parallel"] }
      ins(%arg0 : tensor<?x?xi32>)
      outs(%2 : tensor<?x?xi32>) {
-    ^bb0(%arg2: i32, %arg3: i32):       
+    ^bb0(%arg2: i32, %arg3: i32):
        %idx0 = linalg.index 0 : index
        %idx1 = linalg.index 1 : index
        %4 = arith.index_cast %idx0 : index to i32
@@ -411,7 +411,7 @@ func.func @indexed_producer_indexed_consumer_fusion(%arg0: tensor<?x?xi32>)
      iterator_types = ["parallel", "parallel"] }
      ins(%3 : tensor<?x?xi32>)
      outs(%2 : tensor<?x?xi32>) {
-    ^bb0(%arg2: i32, %arg3: i32):       
+    ^bb0(%arg2: i32, %arg3: i32):
        %idx0 = linalg.index 0 : index
        %idx1 = linalg.index 1 : index
        %5 = arith.index_cast %idx0 : index to i32
@@ -504,7 +504,7 @@ func.func @scalar_generic_fusion
      {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>],
       iterator_types = []}
      ins(%arg1 : tensor<i32>) outs(%0 : tensor<f32>) {
-    ^bb0(%arg2: i32, %arg3: f32):  
+    ^bb0(%arg2: i32, %arg3: f32):
        %3 = arith.index_cast %arg2 : i32 to index
        %4 = tensor.extract %arg0[%3, %c0, %c0] : tensor<5x1x1xf32>
        linalg.yield %4 : f32
@@ -515,7 +515,7 @@ func.func @scalar_generic_fusion
                       affine_map<(d0) -> (d0)>],
      iterator_types = ["parallel"]}
      ins(%1, %cst : tensor<f32>, tensor<10xf32>) outs(%2 : tensor<10xf32>) {
-    ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):  
+    ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
        %4 = arith.mulf %arg2, %arg3 : f32
        linalg.yield %4 : f32
      } -> tensor<10xf32>
@@ -580,7 +580,7 @@ func.func @consumer_with_reduction(%arg0: tensor<1x10xf32>,
       iterator_types = ["parallel", "parallel"]}
      ins(%arg0, %arg1 : tensor<1x10xf32>, tensor<1x10xf32>)
      outs(%init : tensor<1x10xf32>) {
-  ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  
+  ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
      %2 = arith.addf %arg3, %arg4 : f32
      linalg.yield %2 : f32
    } -> tensor<1x10xf32>
@@ -589,7 +589,7 @@ func.func @consumer_with_reduction(%arg0: tensor<1x10xf32>,
       iterator_types = ["reduction"]}
      ins(%0 : tensor<1x10xf32>)
      outs(%arg2 : tensor<1xf32>)  {
-  ^bb0(%arg3: f32, %arg4: f32):  
+  ^bb0(%arg3: f32, %arg4: f32):
      %2 = arith.addf %arg3, %arg4 : f32
      linalg.yield %2 : f32
    } -> tensor<1xf32>
@@ -626,7 +626,7 @@ func.func @sigmoid_dynamic_dim(%0: tensor<?x1xf32>) -> tensor<?x1xf32> {
      iterator_types = ["parallel", "parallel"]
    }
       outs(%init0 : tensor<?x1xf32>) {
-    ^bb0(%a: f32):  
+    ^bb0(%a: f32):
        linalg.yield %cp5 : f32
    } -> tensor<?x1xf32>
    %d0 = tensor.dim %0, %c0 : tensor<?x1xf32>
@@ -639,7 +639,7 @@ func.func @sigmoid_dynamic_dim(%0: tensor<?x1xf32>) -> tensor<?x1xf32> {
    }
        ins(%0, %1 : tensor<?x1xf32>, tensor<?x1xf32>)
       outs(%init1 : tensor<?x1xf32>) {
-  ^bb0(%a: f32, %b: f32, %c: f32):  
+  ^bb0(%a: f32, %b: f32, %c: f32):
        %m = arith.mulf %a, %b : f32
        linalg.yield %m : f32
    } -> tensor<?x1xf32>
@@ -930,7 +930,7 @@ func.func @no_fusion_missing_reduction_shape(%arg0: tensor<f32>, %arg1: index) -
      indexing_maps = [#map0, #map1],
      iterator_types = ["parallel", "parallel"]
    } ins(%arg0 : tensor<f32>) outs(%4 : tensor<?x?xf32>) {
-  ^bb0(%arg2: f32, %arg3: f32):  
+  ^bb0(%arg2: f32, %arg3: f32):
      linalg.yield %arg2 : f32
    } -> tensor<?x?xf32>
    %6 = tensor.empty(%arg1) : tensor<?xf32>
@@ -939,7 +939,7 @@ func.func @no_fusion_missing_reduction_shape(%arg0: tensor<f32>, %arg1: index) -
      indexing_maps = [#map2, #map3],
      iterator_types = ["parallel", "reduction"]
    } ins(%5 : tensor<?x?xf32>) outs(%7 : tensor<?xf32>) {
-  ^bb0(%arg2: f32, %arg3: f32):  
+  ^bb0(%arg2: f32, %arg3: f32):
      %9 = arith.maxf %arg2, %arg3 : f32
      linalg.yield %9 : f32
    } -> tensor<?xf32>
@@ -994,7 +994,7 @@ func.func @fusion_different_axes(%arg0 : tensor<5000xi64>, %arg1 : tensor<5000xi
  // -----
  
  // CHECK-LABEL: func @fold_fill_generic_basic
-//  CHECK-SAME: (%[[ARG0:.*]]: tensor<?xf32>) -> tensor<?xf32> { 
+//  CHECK-SAME: (%[[ARG0:.*]]: tensor<?xf32>) -> tensor<?xf32> {
  //   CHECK-NOT: linalg.fill
  //       CHECK: %[[GENERIC_OP:.*]] = linalg.generic
  //  CHECK-SAME: ins(%[[ARG0]] : tensor<?xf32>)
@@ -1018,7 +1018,7 @@ func.func @fold_fill_generic_basic(%arg0: tensor<?xf32>) -> (tensor<?xf32>) {
  // -----
  
  // CHECK-LABEL: func @fold_fill_generic_different_dtype
-//  CHECK-SAME: (%[[ARG0:.*]]: tensor<?xf16>) -> tensor<?xf16> { 
+//  CHECK-SAME: (%[[ARG0:.*]]: tensor<?xf16>) -> tensor<?xf16> {
  //   CHECK-NOT: linalg.fill
  //       CHECK: %[[GENERIC_OP:.*]] = linalg.generic
  //  CHECK-SAME: ins(%[[ARG0]] : tensor<?xf16>)
diff --git a/mlir/test/Dialect/Linalg/fusion-indexed.mlir b/mlir/test/Dialect/Linalg/fusion-indexed.mlir

index eb5f38b..10cc0ca 100644 (file)
--- a/mlir/test/Dialect/Linalg/fusion-indexed.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-indexed.mlir
@@ -12,7 +12,7 @@ func.func @fuse_indexed_consumer(%A: memref<?x?xf32>,
    linalg.generic #pointwise_2d_trait
      ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
     outs(%C : memref<?x?xf32>) {
-  ^bb0(%e: f32, %arg5: f32, %arg6: f32):   
+  ^bb0(%e: f32, %arg5: f32, %arg6: f32):
      %2 = arith.addf %e, %arg5 : f32
      linalg.yield %2 : f32
    }
@@ -74,7 +74,7 @@ func.func @fuse_indexed_producer(%A: memref<?x?xindex>,
      indexing_maps = [affine_map<(i, j) -> (j, i)>],
      iterator_types = ["parallel", "parallel"]}
      outs(%A : memref<?x?xindex>) {
-  ^bb0(%a: index):   
+  ^bb0(%a: index):
      %idx0 = linalg.index 0 : index
      %idx1 = linalg.index 1 : index
      %0 = arith.addi %idx0, %idx1 : index
@@ -122,7 +122,7 @@ func.func @fuse_indexed_producer_tiled_second_dim_only(%A: memref<?x?xindex>,
      indexing_maps = [affine_map<(i, j) -> (i, j)>],
      iterator_types = ["parallel", "parallel"]}
      outs(%A : memref<?x?xindex>) {
-  ^bb0(%a: index):   
+  ^bb0(%a: index):
      %idx0 = linalg.index 0 : index
      %idx1 = linalg.index 1 : index
      %0 = arith.addi %idx0, %idx1 : index
diff --git a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir

index e55bac4..f1c729e 100644 (file)
--- a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir
@@ -20,7 +20,7 @@ func.func @reshape(%A: tensor<?x16xf32>, %B: tensor<16xf32>, %init: tensor<?x112
      iterator_types = ["parallel", "parallel", "parallel"]}
    ins(%0, %B : tensor<?x112x16xf32>, tensor<16xf32>)
    outs(%init : tensor<?x112x16xf32>) {
-  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  
+  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
      %s = arith.subf %arg1, %arg2 : f32
      linalg.yield %s : f32
    } -> tensor<?x112x16xf32>
@@ -56,7 +56,7 @@ func.func @reshape_multiple(%A: tensor<12544x16xf32>, %B: tensor<12544x16xf32>,
      iterator_types = ["parallel", "parallel", "parallel"]}
    ins(%0, %1, %C : tensor<112x112x16xf32>, tensor<112x112x16xf32>, tensor<16xf32>)
    outs(%2 : tensor<112x112x16xf32>) {
-  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):  
+  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):
      %s = arith.subf %arg1, %arg2 : f32
      %m = arith.mulf %s, %arg3 : f32
      linalg.yield %m : f32
@@ -82,7 +82,7 @@ func.func @reshape_negative(%A: tensor<12544x16xf32>, %B: tensor<112xf32>) -> te
      iterator_types = ["parallel", "parallel", "parallel"]}
    ins(%20, %B : tensor<112x112x16xf32>, tensor<112xf32>)
    outs(%21 : tensor<112x112x16xf32>) {
-  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  
+  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
      %s = arith.subf %arg1, %arg2 : f32
      linalg.yield %s : f32
    } -> tensor<112x112x16xf32>
@@ -107,7 +107,7 @@ func.func @type_correctness(%arg0 : tensor<6x5xi32>, %arg1 : tensor<5xf32>,
        iterator_types = ["parallel", "parallel", "parallel"]}
        ins(%25, %arg1, %arg2 : tensor<2x3x5xi32>, tensor<5xf32>, tensor<5xf32>)
        outs(%26 : tensor<2x3x5xf32>) {
-      ^bb0(%arg6: i32, %arg7: f32, %arg8: f32, %arg9: f32):  
+      ^bb0(%arg6: i32, %arg7: f32, %arg8: f32, %arg9: f32):
          %29 = arith.sitofp %arg6 : i32 to f32
          %30 = arith.addf %arg7, %cst_8 : f32
          %31 = arith.divf %cst_7, %30 : f32
diff --git a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir

index e43b13a..ad7980b 100644 (file)
--- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
@@ -252,7 +252,7 @@ func.func @generalize_batch_matm_vec(%lhs : memref<?x?x?xi8>, %rhs: memref<?x?xi
  // -----
  
  func.func @batch_reduce_gemm(%lhs: memref<7x8x9xf32>, %rhs: memref<7x9x8xf32>, %out: memref<8x8xf32>) {
-  linalg.batch_reduce_matmul ins(%lhs, %rhs: memref<7x8x9xf32>, memref<7x9x8xf32>)  
+  linalg.batch_reduce_matmul ins(%lhs, %rhs: memref<7x8x9xf32>, memref<7x9x8xf32>)
                               outs(%out: memref<8x8xf32>)
    return
  }
diff --git a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir

index c43c8a7..dab6d08 100644 (file)
--- a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
@@ -10,7 +10,7 @@
  func.func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
    %cst = arith.constant 0.000000e+00 : f32
    %0 = tensor.pad %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0]  {
-  ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):  
+  ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst : f32
    } : tensor<1x28x28x1xf32> to tensor<1x32x32x1xf32>
    return %0 : tensor<1x32x32x1xf32>
@@ -39,7 +39,7 @@ func.func @generalize_pad_tensor_dynamic_shape(%arg0: tensor<4x?x2x?xf32>, %arg1
    %c0 = arith.constant 0 : index
    %cst = arith.constant 0.0 : f32
    %out = tensor.pad %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1]  {
-  ^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index, %gen_arg4: index):  
+  ^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index, %gen_arg4: index):
      tensor.yield %cst : f32
    } : tensor<4x?x2x?xf32> to tensor<4x?x?x?xf32>
    return %out : tensor<4x?x?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir b/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir

index 05b3b43..93d5b87 100644 (file)
--- a/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir
+++ b/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir
@@ -14,7 +14,7 @@ func.func @inline_zerod(%arg0: tensor<4xf32>, %scalar: tensor<f32>) -> tensor<4x
                           ins(%arg0, %scalar : tensor<4xf32>, tensor<f32>)
                           outs(%0 : tensor<4xf32>) {
      // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32)
-    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  
+    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
        // CHECK: tensor.extract %[[SCALAR]][]
        %2 = arith.divf %arg1, %arg2 : f32
        linalg.yield %2 : f32
@@ -39,7 +39,7 @@ func.func @inline_oned(%arg0: tensor<4xf32>, %scalar: tensor<1xf32>) -> tensor<4
                           ins(%arg0, %scalar : tensor<4xf32>, tensor<1xf32>)
                           outs(%0 : tensor<4xf32>) {
      // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32)
-    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  
+    ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
        // CHECK: tensor.extract %[[SCALAR]][%[[ZERO]]]
        %2 = arith.divf %arg1, %arg2 : f32
        linalg.yield %2 : f32
diff --git a/mlir/test/Dialect/Linalg/library-calls.mlir b/mlir/test/Dialect/Linalg/library-calls.mlir

index f6d6191..cbf1c5e 100644 (file)
--- a/mlir/test/Dialect/Linalg/library-calls.mlir
+++ b/mlir/test/Dialect/Linalg/library-calls.mlir
@@ -21,4 +21,4 @@ func.func @matmul(%A: memref<?x?xf32>, %B: memref<?x?xf32>) -> (memref<?x?xf32>)
                  outs(%C: memref<?x?xf32>)
    return %C : memref<?x?xf32>
  }
-  
+
diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir

index d1be939..2f715c0 100644 (file)
--- a/mlir/test/Dialect/Linalg/loops.mlir
+++ b/mlir/test/Dialect/Linalg/loops.mlir
@@ -568,7 +568,7 @@ func.func @generic_index_op_1D_reduce(%arg0: memref<?xf32>,
  func.func @generic_const_init(%arg0: memref<?xf32>) {
          %cst = arith.constant 1.0 : f32
    linalg.generic #trait_const_fill outs(%arg0 : memref<?xf32>) {
-    ^bb0(%arg1: f32):   
+    ^bb0(%arg1: f32):
        linalg.yield %cst : f32
      }
      return
diff --git a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir

index 3349af6..6df26b9 100644 (file)
--- a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
+++ b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
@@ -7,7 +7,7 @@ func.func @pad_tensor_with_memrefs(%arg0: memref<1x28x28x1xf32>) -> memref<2x31x
    %cst = arith.constant 0.000000e+00 : f32
    %0 = bufferization.to_tensor %arg0 : memref<1x28x28x1xf32>
    %1 = tensor.pad %0 low[1, 1, 1, 2] high[0, 2, 2, 0]  {
-  ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):  
+  ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst : f32
    } : tensor<1x28x28x1xf32> to tensor<2x31x31x3xf32>
    %2 = bufferization.to_memref %1 : memref<2x31x31x3xf32>
@@ -26,7 +26,7 @@ func.func @pad_tensor_with_memrefs(%arg0: memref<1x28x28x1xf32>) -> memref<2x31x
  func.func @pad_tensor_no_memrefs(%arg0: tensor<1x28x28xf32>) -> tensor<2x32x32xf32> {
    %cst = arith.constant 0.000000e+00 : f32
    %0 = tensor.pad %arg0 low[1, 2, 2] high[0, 2, 2]  {
-  ^bb0(%arg1: index, %arg2: index, %arg3: index):  
+  ^bb0(%arg1: index, %arg2: index, %arg3: index):
      tensor.yield %cst : f32
    } : tensor<1x28x28xf32> to tensor<2x32x32xf32>
    return %0 : tensor<2x32x32xf32>
@@ -44,7 +44,7 @@ func.func @pad_tensor_no_memrefs(%arg0: tensor<1x28x28xf32>) -> tensor<2x32x32xf
  func.func @pad_tensor_detailed(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
    %cst = arith.constant 0.000000e+00 : f32
    %0 = tensor.pad %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0]  {
-  ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):  
+  ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst : f32
    } : tensor<1x28x28x1xf32> to tensor<1x32x32x1xf32>
    return %0 : tensor<1x32x32x1xf32>
diff --git a/mlir/test/Dialect/Linalg/pad_fusion.mlir b/mlir/test/Dialect/Linalg/pad_fusion.mlir

index 59b10da..36eca8e 100644 (file)
--- a/mlir/test/Dialect/Linalg/pad_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/pad_fusion.mlir
@@ -9,7 +9,7 @@ func.func @dynamic_pad_fusion(%arg0 : tensor<?x?xf32>, %arg1 : index, %arg2 : in
    %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
    %0 = linalg.generic {
      indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
-    iterator_types = ["parallel", "parallel"]} 
+    iterator_types = ["parallel", "parallel"]}
      ins(%arg0 : tensor<?x?xf32>) outs(%init : tensor<?x?xf32>) {
      ^bb0(%arg6 : f32, %arg7 : f32):
        %1 = arith.mulf %arg6, %arg6 : f32
@@ -37,7 +37,7 @@ func.func @dynamic_pad_fusion(%arg0 : tensor<?x?xf32>, %arg1 : index, %arg2 : in
  //  CHECK-DAG:   %[[TARGET_D0:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG3]], %[[SOURCE_D0]]]
  //  CHECK-DAG:   %[[SOURCE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]]
  //  CHECK-DAG:   %[[TARGET_D1:.+]] = affine.apply #[[MAP]]()[%[[ARG2]], %[[ARG4]], %[[SOURCE_D1]]]
-//      CHECK:   %[[INIT:.+]] = tensor.empty(%[[TARGET_D0]], %[[TARGET_D1]]) 
+//      CHECK:   %[[INIT:.+]] = tensor.empty(%[[TARGET_D0]], %[[TARGET_D1]])
  //      CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[ARG5]]{{.*}}outs(%[[INIT]]
  //  CHECK-DAG:   %[[SIZE_D0:.+]] = tensor.dim %[[SOURCE]], %[[C0]]
  //  CHECK-DAG:   %[[SIZE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]]
@@ -58,7 +58,7 @@ func.func @mixed_pad_fusion(%arg0 : tensor<?x42xf32>, %arg1 : index, %arg2 : ind
    %init = tensor.empty(%d0) : tensor<42x?xf32>
    %0 = linalg.generic {
      indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>],
-    iterator_types = ["parallel", "parallel"]} 
+    iterator_types = ["parallel", "parallel"]}
      ins(%arg0 : tensor<?x42xf32>) outs(%init : tensor<42x?xf32>) {
      ^bb0(%arg4 : f32, %arg5 : f32):
        %1 = arith.mulf %arg4, %arg4 : f32
diff --git a/mlir/test/Dialect/Linalg/parallel-loops.mlir b/mlir/test/Dialect/Linalg/parallel-loops.mlir

index 5b0f0b9..15bce63 100644 (file)
--- a/mlir/test/Dialect/Linalg/parallel-loops.mlir
+++ b/mlir/test/Dialect/Linalg/parallel-loops.mlir
@@ -9,7 +9,7 @@ func.func @linalg_generic_sum(%lhs: memref<2x2xf32>,
      iterator_types = ["parallel", "parallel"]}
        ins(%lhs, %rhs : memref<2x2xf32>, memref<2x2xf32>)
       outs(%sum : memref<2x2xf32>) {
-    ^bb0(%lhs_in: f32, %rhs_in: f32, %sum_out: f32):   
+    ^bb0(%lhs_in: f32, %rhs_in: f32, %sum_out: f32):
        %0 = arith.addf %lhs_in, %rhs_in : f32
        linalg.yield %0 : f32
    }
diff --git a/mlir/test/Dialect/Linalg/reshape_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_fusion.mlir

index 2506eee..0e40b5f 100644 (file)
--- a/mlir/test/Dialect/Linalg/reshape_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/reshape_fusion.mlir
@@ -15,7 +15,7 @@ func.func @generic_op_reshape_producer_fusion(%arg0 : tensor<?x?x4x?xf32>,
       iterator_types = ["parallel", "parallel", "parallel"]}
         ins(%0, %arg1, %arg2 : tensor<?x?x?xf32>, tensor<?x?x?xf32>, f32)
         outs(%arg1 : tensor<?x?x?xf32>) {
-    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32, %s: f32):       
+    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32, %s: f32):
        %1 = arith.mulf %arg3, %arg4 : f32
        %2 = arith.addf %1, %arg5 : f32
        linalg.yield %2 : f32
@@ -58,7 +58,7 @@ func.func @generic_op_reshape_consumer_fusion(%arg0 : tensor<?x?xf32>,
       iterator_types = ["parallel", "parallel"]}
         ins(%arg0, %arg1, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>, f32)
         outs(%arg0 : tensor<?x?xf32>) {
-    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32, %s: f32):       
+    ^bb0(%arg3: f32, %arg4: f32, %arg5: f32, %s: f32):
        %1 = arith.mulf %arg3, %arg4 : f32
        %2 = arith.addf %1, %arg5 : f32
        linalg.yield %2 : f32
@@ -148,7 +148,7 @@ func.func @generic_op_reshape_consumer_static(%arg0: tensor<264x4xf32>)
       iterator_types = ["parallel", "parallel"]}
         ins(%arg0, %cst : tensor<264x4xf32>, tensor<264x4xf32>)
         outs(%0 : tensor<264x4xf32>) {
-    ^bb0(%arg1: f32, %arg2: f32, %s: f32):  
+    ^bb0(%arg1: f32, %arg2: f32, %s: f32):
        %2 = arith.mulf %arg1, %arg2 : f32
        linalg.yield %2 : f32
      } -> tensor<264x4xf32>
@@ -240,7 +240,7 @@ func.func @indexed_producer_reshape_consumer_fusion(%arg0 : tensor<?x?xi32>,
       iterator_types = ["parallel", "parallel"]}
         ins(%arg0, %arg1 : tensor<?x?xi32>, tensor<?x?xi32>)
        outs(%arg0 : tensor<?x?xi32>) {
-    ^bb0(%arg3: i32, %arg4: i32, %s: i32):       
+    ^bb0(%arg3: i32, %arg4: i32, %s: i32):
        %idx0 = linalg.index 0 : index
        %idx1 = linalg.index 1 : index
        %1 = arith.muli %arg3, %arg4 : i32
@@ -363,7 +363,7 @@ func.func @reshape_as_producer_projected_permutation(
       iterator_types = ["parallel", "parallel", "parallel"]}
       ins(%0 : tensor<264x?xi32>)
      outs(%shape : tensor<264x?x4xi32>) {
-  ^bb0(%arg1: i32, %s: i32):  
+  ^bb0(%arg1: i32, %s: i32):
      %idx0 = linalg.index 0 : index
      %idx1 = linalg.index 1 : index
      %idx2 = linalg.index 2 : index
@@ -419,7 +419,7 @@ func.func @generic_op_reshape_consumer_fusion_projected(%arg0 : tensor<?x?xf32>,
       iterator_types = ["parallel", "parallel"]}
         ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
         outs(%arg0 : tensor<?x?xf32>) {
-    ^bb0(%arg3: f32, %arg4: f32, %s: f32):       
+    ^bb0(%arg3: f32, %arg4: f32, %s: f32):
        %1 = arith.mulf %arg3, %arg4 : f32
        linalg.yield %1 : f32
    } -> tensor<?x?xf32>
@@ -485,7 +485,7 @@ func.func @no_fuse_mismatched_dynamism(%arg0: tensor<2x1xi64>, %arg1: tensor<?xi
       iterator_types = ["parallel"]}
      ins(%0, %arg1 : tensor<2xi64>, tensor<?xi64>)
      outs(%1 : tensor<2xi64>) {
-  ^bb0(%arg4: i64, %arg5: i64, %arg6: i64):  
+  ^bb0(%arg4: i64, %arg5: i64, %arg6: i64):
      %3 = arith.addi %arg4, %arg5 : i64
      linalg.yield %3 : i64
    } -> tensor<2xi64>
diff --git a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir

index 74e8ebb..8a7b262 100644 (file)
--- a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
+++ b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
@@ -121,7 +121,7 @@ transform.sequence failures(propagate) {
  //       CHECK:     else
  //       CHECK:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1]
  //       CHECK:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}]
-//       CHECK:     %[[CAST_SWAP_RESULT:.*]] = tensor.cast %[[SWAP_RESULT]] : tensor<?x?xf32> to tensor<15x?xf32> 
+//       CHECK:     %[[CAST_SWAP_RESULT:.*]] = tensor.cast %[[SWAP_RESULT]] : tensor<?x?xf32> to tensor<15x?xf32>
  //       CHECK:     tensor.insert_slice %[[CAST_SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1]
  //       CHECK:   return %[[RESULT]]
  
diff --git a/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir b/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir

index f2f0a7c..cd30ff7 100644 (file)
--- a/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir
@@ -61,6 +61,6 @@ func.func @multitile_sizes_dynamic(
    %0 = linalg.matmul  ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
                       outs(%arg2: tensor<?x?xf32>)
      -> tensor<?x?xf32>
-  
+
    return %0 : tensor<?x?xf32>
  }
diff --git a/mlir/test/Dialect/Linalg/transform-op-pad.mlir b/mlir/test/Dialect/Linalg/transform-op-pad.mlir

index 21eacbd..e391f05 100644 (file)
--- a/mlir/test/Dialect/Linalg/transform-op-pad.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-pad.mlir
@@ -42,7 +42,7 @@ transform.sequence failures(propagate) {
  func.func @pad(%arg0: tensor<24x12xf32>,
                 %arg1: tensor<12x25xf32>,
                 %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
-  // expected-note @below {{when applied to this op}} 
+  // expected-note @below {{when applied to this op}}
    %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
    func.return %0 : tensor<24x25xf32>
  }
diff --git a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir

index eb035d9..2500b2c 100644 (file)
--- a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir
@@ -244,29 +244,29 @@ func.func @generic_split_3d(%input: tensor<32x2xf32>, %input_2: tensor<5x32xf32>
    return %0 : tensor<5x2xf32>
  }
  
-//  CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d1, d2, d0)>    
-//  CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d3, d1, d2)>    
-//  CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1, d2, d3) -> (d3, d0, d2)>    
-//  CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>        
-//  CHECK-DAG: #[[$MAP4:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)>    
-// CHECK-LABEL:  func @generic_split_3d        
+//  CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d1, d2, d0)>
+//  CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d3, d1, d2)>
+//  CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1, d2, d3) -> (d3, d0, d2)>
+//  CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
+//  CHECK-DAG: #[[$MAP4:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)>
+// CHECK-LABEL:  func @generic_split_3d
  //  CHECK-DAG: %[[ID:.*]] = arith.constant 0x7F800000 : f32
-//  CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<8x4x2xf32>  
-//  CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x8x4xf32>  
-//  CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32>        
-//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32>      
-//      CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]}   
-// CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<8x4x2xf32>, tensor<5x8x4xf32>) outs(%[[F]] : tensor<5x2x4xf32>) {       
-//      CHECK:   arith.addf    
+//  CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<8x4x2xf32>
+//  CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x8x4xf32>
+//  CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32>
+//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32>
+//      CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]}
+// CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<8x4x2xf32>, tensor<5x8x4xf32>) outs(%[[F]] : tensor<5x2x4xf32>) {
+//      CHECK:   arith.addf
  //      CHECK:   arith.minf
-//      CHECK:   linalg.yield  
-//      CHECK: } -> tensor<5x2x4xf32>  
-//      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]}   
-// CHECK-SAME:   ins(%[[G]] : tensor<5x2x4xf32>) outs(%{{.*}} : tensor<5x2xf32>) {     
+//      CHECK:   linalg.yield
+//      CHECK: } -> tensor<5x2x4xf32>
+//      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]}
+// CHECK-SAME:   ins(%[[G]] : tensor<5x2x4xf32>) outs(%{{.*}} : tensor<5x2xf32>) {
  //      CHECK:   arith.minf
-//      CHECK:   linalg.yield  
-//      CHECK:  } -> tensor<5x2xf32>   
-//      CHECK: return %[[R]] : tensor<5x2xf32> 
+//      CHECK:   linalg.yield
+//      CHECK:  } -> tensor<5x2xf32>
+//      CHECK: return %[[R]] : tensor<5x2xf32>
  
  transform.sequence failures(propagate) {
  ^bb1(%arg1: !pdl.operation):
diff --git a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir

index 3215734..5d34f95 100644 (file)
--- a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir
@@ -81,7 +81,7 @@ transform.sequence failures(propagate) {
  func.func @vectorize_pad(
      %arg0: tensor<24x12xf32>, %arg1: tensor<12x25xf32>,
      %arg2: tensor<24x25xf32>, %arg3: index, %arg4: index,
-    %arg5: index) -> tensor<24x25xf32> {    
+    %arg5: index) -> tensor<24x25xf32> {
    %c0 = arith.constant 0 : index
    %cst = arith.constant 0.000000e+00 : f32
    %0 = affine.min #map0()[%arg5]
diff --git a/mlir/test/Dialect/Linalg/transform-promotion.mlir b/mlir/test/Dialect/Linalg/transform-promotion.mlir

index cc82ed6..ab57b36 100644 (file)
--- a/mlir/test/Dialect/Linalg/transform-promotion.mlir
+++ b/mlir/test/Dialect/Linalg/transform-promotion.mlir
@@ -171,7 +171,7 @@ func.func @aligned_promote_fill_complex(%arg0: memref<?x?xcomplex<f32>, strided<
    %cc = complex.create %cf, %cf : complex<f32>
    %3 = memref.subview %arg0[%c0, %c0][%c2000, %c4000][%c1, %c1] :
          memref<?x?xcomplex<f32>, strided<[?, 1], offset: ?>> to memref<?x?xcomplex<f32>, strided<[?, ?], offset: ?>>
-  linalg.fill ins(%cc : complex<f32>) 
+  linalg.fill ins(%cc : complex<f32>)
               outs(%3 : memref<?x?xcomplex<f32>, strided<[?, ?], offset: ?>>)
    return
  }
diff --git a/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir

index 131c7a6..8292590 100644 (file)
--- a/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir
+++ b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir
@@ -84,7 +84,7 @@ transform.sequence failures(propagate) {
  //     CHECK:     %[[INS:.*]] = tensor.insert_slice %[[PR]] into %[[ARG3]][0, 0] [%[[D3]], %[[D4]]] [1, 1] : tensor<?x?xf32> into tensor<5x?xf32>
  //     CHECK:     scf.yield {{.*}} : tensor<5x?xf32>
  //     CHECK:   }
-//     CHECK:   linalg.generic 
+//     CHECK:   linalg.generic
  //     CHECK:   return
  
  // -----
diff --git a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir

index f1f00cf..207ef8e 100644 (file)
--- a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir
+++ b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir
@@ -530,7 +530,7 @@ func.func @depthwise_conv1d_nwc_wc_3x5x4xi8_memref(%input: memref<3x5x4xi8>, %fi
  //      CHECK:  %[[V_FILTER_0:.+]] = vector.extract %[[V_FILTER_R]][0] : vector<2x4xi8>
  //      CHECK:  %[[V_FILTER_1:.+]] = vector.extract %[[V_FILTER_R]][1] : vector<2x4xi8>
  
-/// w == 0, kw = 
+/// w == 0, kw =
  //      CHECK:  %[[EXT_INPUT_0:.*]] = arith.extsi %[[V_INPUT_0]] : vector<3x2x4xi8> to vector<3x2x4xi32>
  //      CHECK:  %[[B_FILTER_0:.*]] = vector.broadcast %[[V_FILTER_0]] : vector<4xi8> to vector<3x2x4xi8>
  //      CHECK:  %[[EXT_FILTER_0:.*]] = arith.extsi %[[B_FILTER_0]] : vector<3x2x4xi8> to vector<3x2x4xi32>
diff --git a/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir b/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir

index 52e181f..2b5c90c 100644 (file)
--- a/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir
+++ b/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir
@@ -8,7 +8,7 @@ func.func @extract_strided_metadata_constants(%base: memref<5x4xf32, strided<[4,
    //   CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
    //   CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
    //   CHECK-DAG: %[[C5:.*]] = arith.constant 5 : index
-  
+
    //       CHECK: %[[BASE:.*]], %[[OFFSET:.*]], %[[SIZES:.*]]:2, %[[STRIDES:.*]]:2 = memref.extract_strided_metadata %[[ARG]]
    %base_buffer, %offset, %sizes:2, %strides:2 = memref.extract_strided_metadata %base :
      memref<5x4xf32, strided<[4,1], offset:2>>
diff --git a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir

index ded7374..8ed0cf0 100644 (file)
--- a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir
+++ b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir
@@ -125,7 +125,7 @@ func.func @fold_subview_with_transfer_read(%arg0 : memref<12x32xf32>, %arg1 : in
  // -----
  
  func.func @fold_static_stride_subview_with_transfer_write_0d(
-    %arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, 
+    %arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index,
      %v : vector<f32>) {
    %f1 = arith.constant 1.0 : f32
    %0 = memref.subview %arg0[%arg1, %arg2][1, 1][2, %arg3] : memref<12x32xf32> to memref<f32, strided<[], offset: ?>>
diff --git a/mlir/test/Dialect/MemRef/ops.mlir b/mlir/test/Dialect/MemRef/ops.mlir

index b389c6f..fbff761 100644 (file)
--- a/mlir/test/Dialect/MemRef/ops.mlir
+++ b/mlir/test/Dialect/MemRef/ops.mlir
@@ -288,7 +288,7 @@ func.func @expand_collapse_shape_transposed_layout(
      memref<?x?xf32, strided<[1, 10], offset: 0>>
  
    %r1 = memref.expand_shape %m1 [[0, 1], [2], [3, 4]] :
-    memref<4x5x6xf32, strided<[1, ?, 1000], offset: 0>> into 
+    memref<4x5x6xf32, strided<[1, ?, 1000], offset: 0>> into
      memref<2x2x5x2x3xf32, strided<[2, 1, ?, 3000, 1000], offset: 0>>
    %rr1 = memref.collapse_shape %r1 [[0, 1], [2], [3, 4]] :
      memref<2x2x5x2x3xf32, strided<[2, 1, ?, 3000, 1000], offset: 0>> into
@@ -333,7 +333,7 @@ func.func @generic_atomic_rmw(%I: memref<1x2xf32>, %i : index, %j : index) {
  
  // -----
  
-func.func @extract_strided_metadata(%memref : memref<10x?xf32>) 
+func.func @extract_strided_metadata(%memref : memref<10x?xf32>)
    -> memref<?x?xf32, strided<[?, ?], offset: ?>> {
  
    %base, %offset, %sizes:2, %strides:2 = memref.extract_strided_metadata %memref
diff --git a/mlir/test/Dialect/MemRef/subview.mlir b/mlir/test/Dialect/MemRef/subview.mlir

index 16ba46f..2df3e24 100644 (file)
--- a/mlir/test/Dialect/MemRef/subview.mlir
+++ b/mlir/test/Dialect/MemRef/subview.mlir
@@ -65,7 +65,7 @@ func.func @memref_subview(%arg0 : index, %arg1 : index, %arg2 : index) {
      : memref<16x4xf32> to memref<4x4xf32, strided<[8, 2], offset: ?>>
  
    %12 = memref.alloc() : memref<1x9x1x4x1xf32, strided<[36, 36, 4, 4, 1]>>
-  // CHECK: memref.subview 
+  // CHECK: memref.subview
    // CHECK-SAME: [1, 9, 1, 4, 1]
    // CHECK-SAME: memref<1x9x1x4x1xf32, strided<[36, 36, 4, 4, 1]>> to memref<9x4xf32, strided<[?, ?], offset: ?>>
    %13 = memref.subview %12[%arg1, %arg1, %arg1, %arg1, %arg1][1, 9, 1, 4, 1][%arg2, %arg2, %arg2, %arg2, %arg2] : memref<1x9x1x4x1xf32, strided<[36, 36, 4, 4, 1], offset: 0>> to memref<9x4xf32, strided<[?, ?], offset: ?>>
diff --git a/mlir/test/Dialect/NVGPU/invalid.mlir b/mlir/test/Dialect/NVGPU/invalid.mlir

index 7fc8410..1d1205a 100644 (file)
--- a/mlir/test/Dialect/NVGPU/invalid.mlir
+++ b/mlir/test/Dialect/NVGPU/invalid.mlir
@@ -50,63 +50,63 @@ func.func @ldmatrix_type_x4(%arg0: memref<128x128xf32, 3>) ->  vector<4x2xf16> {
  
  func.func @m16n8k16_fp16_vector_shape_a(%arg0: vector<4x4xf16>, %arg1: vector<2x2xf16>, %arg2: vector<2x2xf16>) -> vector<2x2xf16> {
    // expected-error @+1 {{expected 256 warp-wide matrix A elements}}
-  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 16]} : (vector<4x4xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>    
+  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 16]} : (vector<4x4xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>
    return %d : vector<2x2xf16>
  }
  // -----
  
  func.func @m16n8k16_fp16_vector_shape_b(%arg0: vector<4x2xf16>, %arg1: vector<2x4xf16>, %arg2: vector<2x2xf16>) -> vector<2x2xf16> {
    // expected-error @+1 {{expected 128 warp-wide matrix B elements}}
-  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 16]} : (vector<4x2xf16>, vector<2x4xf16>, vector<2x2xf16>) -> vector<2x2xf16>    
+  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 16]} : (vector<4x2xf16>, vector<2x4xf16>, vector<2x2xf16>) -> vector<2x2xf16>
    return %d : vector<2x2xf16>
  }
  // -----
  
  func.func @m16n8k16_fp16_vector_shape_c(%arg0: vector<4x2xf16>, %arg1: vector<2x2xf16>, %arg2: vector<2x4xf16>) -> vector<2x4xf16> {
    // expected-error @+1 {{expected 128 warp-wide matrix C elements}}
-  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 16]} : (vector<4x2xf16>, vector<2x2xf16>, vector<2x4xf16>) -> vector<2x4xf16>    
+  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 16]} : (vector<4x2xf16>, vector<2x2xf16>, vector<2x4xf16>) -> vector<2x4xf16>
    return %d : vector<2x4xf16>
  }
  // -----
  
  func.func @m16n8k16_fp16_vector_shape_a_extended(%arg0: vector<2x4xf16>, %arg1: vector<2x2xf16>, %arg2: vector<2x2xf16>) -> vector<2x2xf16> {
    // expected-error @+1 {{expected matrix A to be shaped (4 x 2)}}
-  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 16]} : (vector<2x4xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>    
+  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 16]} : (vector<2x4xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>
    return %d : vector<2x2xf16>
  }
  // -----
  
  func.func @m16n8k16_fp16_tf32Enabled(%arg0: vector<4x2xf16>, %arg1: vector<2x2xf16>, %arg2: vector<2x2xf16>) -> vector<2x2xf16> {
    // expected-error @+1 {{expected tf32 tensor cores only for F32 operands}}
-  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 16], tf32Enabled} : (vector<4x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>    
+  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 16], tf32Enabled} : (vector<4x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>
    return %d : vector<2x2xf16>
  }
  // -----
  
  func.func @m16n8k8_fp32_vector_shape_a(%arg0: vector<4x2xf32>, %arg1: vector<2x1xf32>, %arg2: vector<2x2xf32>) -> vector<2x2xf32> {
    // expected-error @+1 {{expected 128 warp-wide matrix A elements}}
-  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 8]} : (vector<4x2xf32>, vector<2x1xf32>, vector<2x2xf32>) -> vector<2x2xf32>    
+  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 8]} : (vector<4x2xf32>, vector<2x1xf32>, vector<2x2xf32>) -> vector<2x2xf32>
    return %d : vector<2x2xf32>
  }
  // -----
  
  func.func @m16n8k8_fp32_vector_shape_a_extended(%arg0: vector<1x4xf32>, %arg1: vector<2x1xf32>, %arg2: vector<2x2xf32>) -> vector<2x2xf32> {
    // expected-error @+1 {{expected matrix A to be shaped (4 x 1)}}
-  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 8]} : (vector<1x4xf32>, vector<2x1xf32>, vector<2x2xf32>) -> vector<2x2xf32>    
+  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 8]} : (vector<1x4xf32>, vector<2x1xf32>, vector<2x2xf32>) -> vector<2x2xf32>
    return %d : vector<2x2xf32>
  }
  // -----
  
  func.func @m8n8k4_fp64_vector_shape_a(%arg0: vector<1x2xf64>, %arg1: vector<1x1xf64>, %arg2: vector<1x2xf64>) -> vector<1x2xf64> {
    // expected-error @+1 {{expected 32 warp-wide matrix A elements}}
-  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [8, 8, 4]} : (vector<1x2xf64>, vector<1x1xf64>, vector<1x2xf64>) -> vector<1x2xf64>    
+  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [8, 8, 4]} : (vector<1x2xf64>, vector<1x1xf64>, vector<1x2xf64>) -> vector<1x2xf64>
    return %d : vector<1x2xf64>
  }
  // -----
  
  func.func @m8n8k4_fp64_vector_shape_c_extended(%arg0: vector<1x1xf64>, %arg1: vector<1x1xf64>, %arg2: vector<2x1xf64>) -> vector<2x1xf64> {
    // expected-error @+1 {{expected matrix C to be shaped (1 x 2)}}
-  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [8, 8, 4]} : (vector<1x1xf64>, vector<1x1xf64>, vector<2x1xf64>) -> vector<2x1xf64>    
+  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [8, 8, 4]} : (vector<1x1xf64>, vector<1x1xf64>, vector<2x1xf64>) -> vector<2x1xf64>
    return %d : vector<2x1xf64>
  }
  // -----
diff --git a/mlir/test/Dialect/NVGPU/mma-sync-f32-to-tf32.mlir b/mlir/test/Dialect/NVGPU/mma-sync-f32-to-tf32.mlir

index 80de11f..6d0cf34 100644 (file)
--- a/mlir/test/Dialect/NVGPU/mma-sync-f32-to-tf32.mlir
+++ b/mlir/test/Dialect/NVGPU/mma-sync-f32-to-tf32.mlir
@@ -1,10 +1,10 @@
  // RUN: mlir-opt %s -test-nvgpu-mmasync-f32-to-tf32-patterns="precision=tf32" -split-input-file | FileCheck %s
  
  // CHECK-LABEL: m16n8k4_tf32
-func.func @m16n8k4_tf32(%arg0: vector<2x1xf32>, %arg1: vector<1x1xf32>, %arg2: vector<2x2xf32>) -> vector<2x2xf32> {  
+func.func @m16n8k4_tf32(%arg0: vector<2x1xf32>, %arg1: vector<1x1xf32>, %arg2: vector<2x2xf32>) -> vector<2x2xf32> {
    // CHECK: nvgpu.mma.sync
    // CHECK-SAME: tf32Enabled
-  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 4]} : (vector<2x1xf32>, vector<1x1xf32>, vector<2x2xf32>) -> vector<2x2xf32>  
+  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 4]} : (vector<2x1xf32>, vector<1x1xf32>, vector<2x2xf32>) -> vector<2x2xf32>
    return %d : vector<2x2xf32>
  }
  
@@ -14,7 +14,7 @@ func.func @m16n8k4_tf32(%arg0: vector<2x1xf32>, %arg1: vector<1x1xf32>, %arg2: v
  func.func @m16n8k8_tf32(%arg0: vector<4x1xf32>, %arg1: vector<2x1xf32>, %arg2: vector<2x2xf32>) -> vector<2x2xf32> {
    // CHECK: nvgpu.mma.sync
    // CHECK-SAME: tf32Enabled
-  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 8]} : (vector<4x1xf32>, vector<2x1xf32>, vector<2x2xf32>) -> vector<2x2xf32>    
+  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 8]} : (vector<4x1xf32>, vector<2x1xf32>, vector<2x2xf32>) -> vector<2x2xf32>
    return %d : vector<2x2xf32>
  }
  // -----
diff --git a/mlir/test/Dialect/NVGPU/mma-sync-f32-to-tf32x3.mlir b/mlir/test/Dialect/NVGPU/mma-sync-f32-to-tf32x3.mlir

index 523ba24..7db6025 100644 (file)
--- a/mlir/test/Dialect/NVGPU/mma-sync-f32-to-tf32x3.mlir
+++ b/mlir/test/Dialect/NVGPU/mma-sync-f32-to-tf32x3.mlir
@@ -1,9 +1,9 @@
  // RUN: mlir-opt %s -test-nvgpu-mmasync-f32-to-tf32-patterns="precision=tf32x3" -split-input-file | FileCheck %s
  
  // CHECK-LABEL: m16n8k4_tf32
-func.func @m16n8k4_tf32(%arg0: vector<2x1xf32>, %arg1: vector<1x1xf32>, %arg2: vector<2x2xf32>) -> vector<2x2xf32> {  
+func.func @m16n8k4_tf32(%arg0: vector<2x1xf32>, %arg1: vector<1x1xf32>, %arg2: vector<2x2xf32>) -> vector<2x2xf32> {
    // expected-error @+1 {{TF32x3 is not supported at the moment for nvgpu.mma.sync on f32 datatype}}
-  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 4]} : (vector<2x1xf32>, vector<1x1xf32>, vector<2x2xf32>) -> vector<2x2xf32>  
+  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 4]} : (vector<2x1xf32>, vector<1x1xf32>, vector<2x2xf32>) -> vector<2x2xf32>
    return %d : vector<2x2xf32>
  }
  
@@ -12,7 +12,7 @@ func.func @m16n8k4_tf32(%arg0: vector<2x1xf32>, %arg1: vector<1x1xf32>, %arg2: v
  // CHECK-LABEL: m16n8k8_tf32
  func.func @m16n8k8_tf32(%arg0: vector<4x1xf32>, %arg1: vector<2x1xf32>, %arg2: vector<2x2xf32>) -> vector<2x2xf32> {
    // expected-error @+1 {{TF32x3 is not supported at the moment for nvgpu.mma.sync on f32 datatype}}
-  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 8]} : (vector<4x1xf32>, vector<2x1xf32>, vector<2x2xf32>) -> vector<2x2xf32>    
+  %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 8]} : (vector<4x1xf32>, vector<2x1xf32>, vector<2x2xf32>) -> vector<2x2xf32>
    return %d : vector<2x2xf32>
  }
  // -----
diff --git a/mlir/test/Dialect/NVGPU/optimize-shared-memory.mlir b/mlir/test/Dialect/NVGPU/optimize-shared-memory.mlir

index bb1108e..010b325 100644 (file)
--- a/mlir/test/Dialect/NVGPU/optimize-shared-memory.mlir
+++ b/mlir/test/Dialect/NVGPU/optimize-shared-memory.mlir
@@ -15,7 +15,7 @@ func.func @optimize_128x32xf16_32x128xf16(%arg0: memref<128x128xf16>,
    // CHECK: [[src_bits:%.+]] = arith.andi [[stRow]], [[c6]]
    // CHECK: [[c2:%.+]] = arith.constant 2 : index
    // CHECK: [[xorBits:%.+]] = arith.shli [[src_bits]], [[c2]]
-  // CHECK: [[stColPerm:%.+]] = arith.xori [[stCol]], [[xorBits]] 
+  // CHECK: [[stColPerm:%.+]] = arith.xori [[stCol]], [[xorBits]]
    // CHECK: nvgpu.device_async_copy [[arg0]][[[ldRow]], [[ldCol]]], [[shm]][[[stRow]], [[stColPerm]]]
    %0 = nvgpu.device_async_copy %arg0[%ldRow, %ldCol], %shm[%stRow, %stCol], 8
        : memref<128x128xf16> to memref<128x32xf16, 3>
@@ -35,7 +35,7 @@ func.func @optimize_128x32xf16_32x128xf16(%arg0: memref<128x128xf16>,
    // CHECK: [[src_bits:%.+]] = arith.andi [[stRow]], [[c15]]
    // CHECK: [[c3:%.+]] = arith.constant 3 : index
    // CHECK: [[xorBits:%.+]] = arith.shli [[src_bits]], [[c3]]
-  // CHECK: [[stColPerm:%.+]] = arith.xori [[stCol]], [[xorBits]]  
+  // CHECK: [[stColPerm:%.+]] = arith.xori [[stCol]], [[xorBits]]
    // CHECK: nvgpu.device_async_copy [[arg0]][[[ldRow]], [[ldCol]]], [[shmB]][[[stRow]], [[stColPerm]]]
    %2 = nvgpu.device_async_copy %arg0[%ldRow, %ldCol], %shmB[%stRow, %stCol], 8
        : memref<128x128xf16> to memref<32x128xf16, 3>
@@ -47,7 +47,7 @@ func.func @optimize_128x32xf16_32x128xf16(%arg0: memref<128x128xf16>,
    // CHECK: [[c3:%.+]] = arith.constant 3 : index
    // CHECK: [[xorBits:%.+]] = arith.shli [[srcBits]], [[c3]]
    // CHECK: [[fragColPerm:%.+]] = arith.xori [[fragCol]], [[xorBits]]
-  // CHECK: nvgpu.ldmatrix [[shmB]][[[fragRow]], [[fragColPerm]]]      
+  // CHECK: nvgpu.ldmatrix [[shmB]][[[fragRow]], [[fragColPerm]]]
    %matB = nvgpu.ldmatrix %shmB[%fragRow, %fragCol] {numTiles = 4 : i32, transpose = false}
        : memref<32x128xf16, 3> -> vector<4x2xf16>
  
@@ -67,12 +67,12 @@ func.func @optimize_64x16xf32_16x64xf32(%arg0: memref<128x128xf32>,
    // CHECK: [[shmB:%.+]] = memref.alloc
    %shm = memref.alloc() : memref<64x16xf32, 3>
    %shmB = memref.alloc() : memref<16x64xf32, 3>
-  
+
    // CHECK: [[c6:%.+]] = arith.constant 6 : index
    // CHECK: [[src_bits:%.+]] = arith.andi [[stRow]], [[c6]]
    // CHECK: [[c1:%.+]] = arith.constant 1 : index
    // CHECK: [[xorBits:%.+]] = arith.shli [[src_bits]], [[c1]]
-  // CHECK: [[stColPerm:%.+]] = arith.xori [[stCol]], [[xorBits]]  
+  // CHECK: [[stColPerm:%.+]] = arith.xori [[stCol]], [[xorBits]]
    // CHECK: nvgpu.device_async_copy [[arg0]][[[ldRow]], [[ldCol]]], [[shm]][[[stRow]], [[stColPerm]]]
    %0 = nvgpu.device_async_copy %arg0[%ldRow, %ldCol], %shm[%stRow, %stCol], 8
        : memref<128x128xf32> to memref<64x16xf32, 3>
@@ -84,7 +84,7 @@ func.func @optimize_64x16xf32_16x64xf32(%arg0: memref<128x128xf32>,
    // CHECK: [[c1:%.+]] = arith.constant 1 : index
    // CHECK: [[xorBits:%.+]] = arith.shli [[srcBits]], [[c1]]
    // CHECK: [[fragColPerm:%.+]] = arith.xori [[fragCol]], [[xorBits]]
-  // CHECK: nvgpu.ldmatrix [[shm]][[[fragRow]], [[fragColPerm]]]  
+  // CHECK: nvgpu.ldmatrix [[shm]][[[fragRow]], [[fragColPerm]]]
    %mat = nvgpu.ldmatrix %shm[%fragRow, %fragCol] {numTiles = 4 : i32, transpose = false}
        : memref<64x16xf32, 3> -> vector<4x1xf32>
  
@@ -112,7 +112,7 @@ func.func @optimize_64x16xf32_16x64xf32(%arg0: memref<128x128xf32>,
    // CHECK: [[xorBits:%.+]] = arith.shli [[srcBits]], [[c1]]
    // CHECK: [[fragColPerm:%.+]] = arith.xori [[fragCol]], [[xorBits]]
    // CHECK: vector.store %{{.+}}, [[shm]][[[fragRow]], [[fragColPerm]]]
-  vector.store %elem2, %shm[%fragRow, %fragCol] : memref<64x16xf32, 3>, vector<4xf32>  
+  vector.store %elem2, %shm[%fragRow, %fragCol] : memref<64x16xf32, 3>, vector<4xf32>
  
    // CHECK: [[c6:%.+]] = arith.constant 6 : index
    // CHECK: [[srcBits:%.+]] = arith.andi [[fragRow]], [[c6]]
@@ -140,7 +140,7 @@ func.func @optimize_64x16xf32_16x64xf32(%arg0: memref<128x128xf32>,
    // CHECK: [[c2:%.+]] = arith.constant 2 : index
    // CHECK: [[xorBits:%.+]] = arith.shli [[srcBits]], [[c2]]
    // CHECK: [[fragColPerm:%.+]] = arith.xori [[fragCol]], [[xorBits]]
-  // CHECK: nvgpu.ldmatrix [[shmB]][[[fragRow]], [[fragColPerm]]]  
+  // CHECK: nvgpu.ldmatrix [[shmB]][[[fragRow]], [[fragColPerm]]]
    %matB = nvgpu.ldmatrix %shmB[%fragRow, %fragCol] {numTiles = 4 : i32, transpose = false}
        : memref<16x64xf32, 3> -> vector<4x1xf32>
  
@@ -150,7 +150,7 @@ func.func @optimize_64x16xf32_16x64xf32(%arg0: memref<128x128xf32>,
    // CHECK: [[xorBits:%.+]] = arith.shli [[srcBits]], [[c2]]
    // CHECK: [[fragColPerm:%.+]] = arith.xori [[fragCol]], [[xorBits]]
    // CHECK: memref.load [[shmB]][[[fragRow]], [[fragColPerm]]]
-  %elemB = memref.load %shmB[%fragRow, %fragCol] : memref<16x64xf32, 3>  
+  %elemB = memref.load %shmB[%fragRow, %fragCol] : memref<16x64xf32, 3>
  
    return %mat, %matB, %elem, %elem2, %elemB: vector<4x1xf32>, vector<4x1xf32>, f32, vector<4xf32>, f32
  }
@@ -173,7 +173,7 @@ func.func @small_column_size_f64(%arg0: memref<32x32xf64>,
    // CHECK: [[src_bits:%.+]] = arith.andi [[stRow]], [[c4]]
    // CHECK: [[c1:%.+]] = arith.constant 1 : index
    // CHECK: [[xorBits:%.+]] = arith.shrui [[src_bits]], [[c1]]
-  // CHECK: [[stColPerm:%.+]] = arith.xori [[stCol]], [[xorBits]] 
+  // CHECK: [[stColPerm:%.+]] = arith.xori [[stCol]], [[xorBits]]
    // CHECK: nvgpu.device_async_copy [[arg0]][[[ldRow]], [[ldCol]]], [[shm]][[[stRow]], [[stColPerm]]]
    %0 = nvgpu.device_async_copy %arg0[%ldRow, %ldCol], %shm[%stRow, %stCol], 8
        : memref<32x32xf64> to memref<32x4xf64, 3>
diff --git a/mlir/test/Dialect/OpenMP/canonicalize.mlir b/mlir/test/Dialect/OpenMP/canonicalize.mlir

index 93c3a23..c5ab769 100644 (file)
--- a/mlir/test/Dialect/OpenMP/canonicalize.mlir
+++ b/mlir/test/Dialect/OpenMP/canonicalize.mlir
@@ -121,7 +121,7 @@ func.func @parallel_maybe_side_effects(%a: i32, %b: i32) {
    return
  }
  
-func.func private @foo() -> () 
+func.func private @foo() -> ()
  
  // CHECK: omp.parallel
  // CHECK: func.call @foo() : () -> ()
diff --git a/mlir/test/Dialect/PDL/invalid.mlir b/mlir/test/Dialect/PDL/invalid.mlir

index 522e9fb..c76bc9d 100644 (file)
--- a/mlir/test/Dialect/PDL/invalid.mlir
+++ b/mlir/test/Dialect/PDL/invalid.mlir
@@ -245,7 +245,7 @@ pdl.pattern : benefit(1) {
    %operand = pdl.operand
    %resultType = pdl.type
    %root = pdl.operation "baz.op"(%operand : !pdl.value) -> (%resultType : !pdl.type)
-  
+
    rewrite %root {
      // expected-error @below {{expected operand to have element type '!pdl.value', but got '!pdl.type'}}
      %range = pdl.range %operand, %resultType : !pdl.value, !pdl.type
diff --git a/mlir/test/Dialect/PDL/ops.mlir b/mlir/test/Dialect/PDL/ops.mlir

index c15f53d..6e6da5c 100644 (file)
--- a/mlir/test/Dialect/PDL/ops.mlir
+++ b/mlir/test/Dialect/PDL/ops.mlir
@@ -151,7 +151,7 @@ pdl.pattern @attribute_with_loc : benefit(1) {
    // CHECK-GENERIC: "pdl.attribute"
    // CHECK-GENERIC-NOT: value = loc
    %attr = attribute loc("bar")
-  
+
    %root = operation {"attribute" = %attr}
    rewrite %root with "rewriter"
  }
diff --git a/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir b/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir

index f853e2c..2632b72 100644 (file)
--- a/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir
+++ b/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir
@@ -16,16 +16,16 @@ func.func @reduce() {
  
      // CHECK: memref.subview %{{.*}}[%{{.*}}, 0] [%[[C64]], 384] [1, 1] : memref<128x384xf32> to memref<?x384xf32, {{.*}}>
      // CHECK: memref.subview %{{.*}}[%{{.*}}] [%[[C64]]] [1] : memref<128xf32> to memref<?xf32, {{.*}}>
-    %11 = memref.subview %0[%9, 0] [%10, 384] [1, 1] : 
+    %11 = memref.subview %0[%9, 0] [%10, 384] [1, 1] :
        memref<128x384xf32> to memref<?x384xf32, affine_map<(d0, d1)[s0] -> (d0 * 384 + s0 + d1)>>
-    %12 = memref.subview %2[%9] [%10] [1] : 
+    %12 = memref.subview %2[%9] [%10] [1] :
        memref<128xf32> to memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
  
      // CHECK: linalg.generic {{.*}} ins(%{{.*}} : memref<?x384xf32, {{.*}}>) outs(%{{.*}} : memref<?xf32, {{.*}}>)
-    linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, 
-                                      affine_map<(d0, d1) -> (d0)>], 
-                     iterator_types = ["parallel", "reduction"]} 
-      ins(%11 : memref<?x384xf32, affine_map<(d0, d1)[s0] -> (d0 * 384 + s0 + d1)>>) 
+    linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+                                      affine_map<(d0, d1) -> (d0)>],
+                     iterator_types = ["parallel", "reduction"]}
+      ins(%11 : memref<?x384xf32, affine_map<(d0, d1)[s0] -> (d0 * 384 + s0 + d1)>>)
        outs(%12 : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>) {
          ^bb0(%arg1: f32, %arg2: f32):
            %14 = arith.addf %arg1, %arg2 : f32
diff --git a/mlir/test/Dialect/SCF/loop-pipelining.mlir b/mlir/test/Dialect/SCF/loop-pipelining.mlir

index 56e01fe..ead1e71 100644 (file)
--- a/mlir/test/Dialect/SCF/loop-pipelining.mlir
+++ b/mlir/test/Dialect/SCF/loop-pipelining.mlir
@@ -43,7 +43,7 @@ func.func @simple_pipeline(%A: memref<?xf32>, %result: memref<?xf32>) {
  //   CHECK-DAG:   %[[C1:.*]] = arith.constant 1 : index
  //   CHECK-DAG:   %[[C3:.*]] = arith.constant 3 : index
  // Prologue:
-//       CHECK:   %[[L0:.*]] = scf.execute_region 
+//       CHECK:   %[[L0:.*]] = scf.execute_region
  //  CHECK-NEXT:     memref.load %[[A]][%[[C0]]] : memref<?xf32>
  // Kernel:
  //       CHECK:   %[[L1:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C3]]
@@ -352,7 +352,7 @@ func.func @multiple_uses(%A: memref<?xf32>, %result: memref<?xf32>) {
  //  CHECK-NEXT:   %[[MUL2:.*]] = scf.execute_region
  // arith.mulf %[[LR]]#2, %[[LR]]#0 : f32
  //       CHECK:   memref.store %[[LR]]#3, %[[R]][%[[C7]]] : memref<?xf32>
-//  CHECK-NEXT:   %[[MUL3:.*]] = scf.execute_region 
+//  CHECK-NEXT:   %[[MUL3:.*]] = scf.execute_region
  /// %[[ADD3]], %[[LR]]#1 : f32
  //       CHECK:   memref.store %[[MUL2]], %[[R]][%[[C8]]] : memref<?xf32>
  //  CHECK-NEXT:   memref.store %[[MUL3]], %[[R]][%[[C9]]] : memref<?xf32>
@@ -488,10 +488,10 @@ func.func @region_backedge_different_stage(%A: memref<?xf32>) -> f32 {
      %A_elem = scf.execute_region -> f32 {
        %A_elem1 = memref.load %A[%i0] : memref<?xf32>
        scf.yield %A_elem1 : f32
-    } { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 }    
+    } { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 }
      %A1_elem = scf.execute_region -> f32 {
        %inner = arith.addf %A_elem, %arg0 : f32
-      scf.yield %inner : f32 
+      scf.yield %inner : f32
      }  { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 }
      %A2_elem = arith.mulf %cf, %A1_elem { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 0 } : f32
      scf.yield %A2_elem : f32
@@ -544,35 +544,35 @@ func.func @backedge_same_stage(%A: memref<?xf32>) -> f32 {
  // CHECK:   %[[C1:.+]] = arith.constant 1 :
  // CHECK:   %[[APRO:.+]] = memref.alloc() :
  // CHECK:   %[[BPRO:.+]] = memref.alloc() :
-// CHECK:   %[[ASV0:.+]] = memref.subview %[[ARG0]][%[[C0]]] [8] [1] : 
-// CHECK:   %[[BSV0:.+]] = memref.subview %[[ARG1]][%[[C0]]] [8] [1] : 
+// CHECK:   %[[ASV0:.+]] = memref.subview %[[ARG0]][%[[C0]]] [8] [1] :
+// CHECK:   %[[BSV0:.+]] = memref.subview %[[ARG1]][%[[C0]]] [8] [1] :
  
  // Prologue:
  // CHECK:   %[[PAV0:.+]] = memref.subview %[[APRO]][%[[C0]], 0] [1, 8] [1, 1] :
  // CHECK:   %[[PBV0:.+]] = memref.subview %[[BPRO]][%[[C0]], 0] [1, 8] [1, 1] :
-// CHECK:   memref.copy %[[ASV0]], %[[PAV0]] : 
-// CHECK:   memref.copy %[[BSV0]], %[[PBV0]] : 
+// CHECK:   memref.copy %[[ASV0]], %[[PAV0]] :
+// CHECK:   memref.copy %[[BSV0]], %[[PBV0]] :
  
  // Kernel:
-// CHECK:   %[[R:.+]]:2 = scf.for %[[IV:.+]] = %[[C0]] to %[[C3]] step %[[C1]] 
+// CHECK:   %[[R:.+]]:2 = scf.for %[[IV:.+]] = %[[C0]] to %[[C3]] step %[[C1]]
  // CHECK-SAME: iter_args(%[[IA:.+]] = %[[PAV0]], %[[IB:.+]] = %[[PBV0:.+]])
  // CHECK:     %[[CV:.+]] = memref.subview %[[ARG2]]
  // CHECK:     linalg.generic
  // CHECK-SAME:  ins(%[[IA]], %[[IB]], %{{.*}} : {{.*}}) outs(%[[CV]] :
-// CHECK:     %[[NEXT:.+]] = arith.addi %[[IV]], %[[C1]] 
+// CHECK:     %[[NEXT:.+]] = arith.addi %[[IV]], %[[C1]]
  // CHECK:     %[[ASV:.+]] = memref.subview %[[ARG0]][%[[NEXT]]] [8] [1] :
  // CHECK:     %[[NEXT:.+]] = arith.addi %[[IV]], %[[C1]] :
  // CHECK:     %[[BSV:.+]] = memref.subview %[[ARG1]][%[[NEXT]]] [8] [1] :
  // CHECK:     %[[NEXT:.+]] = arith.addi %[[IV]], %[[C1]] :
  // CHECK:     %[[BUFIDX:.+]] = affine.apply
-// CHECK:     %[[APROSV:.+]] = memref.subview %[[APRO]][%[[BUFIDX]], 0] [1, 8] [1, 1] : 
-// CHECK:     %[[BPROSV:.+]] = memref.subview %[[BPRO]][%[[BUFIDX]], 0] [1, 8] [1, 1] : 
+// CHECK:     %[[APROSV:.+]] = memref.subview %[[APRO]][%[[BUFIDX]], 0] [1, 8] [1, 1] :
+// CHECK:     %[[BPROSV:.+]] = memref.subview %[[BPRO]][%[[BUFIDX]], 0] [1, 8] [1, 1] :
  // CHECK:     memref.copy %[[ASV]], %[[APROSV]] :
  // CHECK:     memref.copy %[[BSV]], %[[BPROSV]] :
  // CHECK:     scf.yield %[[APROSV]], %[[BPROSV]] :
  // CHECK:   }
  // CHECK:   %[[CV:.+]] = memref.subview %[[ARG2]][%[[C3]]] [8] [1] :
-// CHECK:   linalg.generic 
+// CHECK:   linalg.generic
  // CHECK-SAME: ins(%[[R]]#0, %[[R]]#1, %{{.*}} : {{.*}}) outs(%[[CV]] :
  
  
@@ -584,7 +584,7 @@ func.func @backedge_same_stage(%A: memref<?xf32>) -> f32 {
        #map1,
        #map1,
        #map2,
-      #map1        
+      #map1
      ],
    iterator_types = ["parallel"],
    __test_pipelining_stage__ = 1,
@@ -598,7 +598,7 @@ func.func @pipeline_op_with_region(%A: memref<?xf32>, %B: memref<?xf32>, %result
    %a_buf = memref.alloc() : memref<2x8xf32>
    %b_buf = memref.alloc() : memref<2x8xf32>
    scf.for %i0 = %c0 to %c4 step %c1 {
-    %A_view = memref.subview %A[%i0][8][1] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 3 } : memref<?xf32> to memref<8xf32, #map>    
+    %A_view = memref.subview %A[%i0][8][1] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 3 } : memref<?xf32> to memref<8xf32, #map>
      %B_view = memref.subview %B[%i0][8][1] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 4 } : memref<?xf32> to memref<8xf32, #map>
      %buf_idx = affine.apply  affine_map<(d0)->(d0 mod 2)> (%i0)[] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 5 }
      %a_buf_view = memref.subview %a_buf[%buf_idx,0][1,8][1,1] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 6 } : memref<2x8xf32> to memref<8xf32, #map>
@@ -611,9 +611,9 @@ func.func @pipeline_op_with_region(%A: memref<?xf32>, %B: memref<?xf32>, %result
        outs(%C_view: memref<8xf32, #map>) {
        ^bb0(%a: f32, %b: f32, %s: f32, %c: f32):
          %add = arith.addf %a, %b : f32
-        %accum = arith.addf %add, %c : f32 
+        %accum = arith.addf %add, %c : f32
          %accum1 = arith.addf %scalar, %accum : f32
-        %accum2 = arith.addf %s, %accum1 : f32        
+        %accum2 = arith.addf %s, %accum1 : f32
          linalg.yield %accum2 : f32
      }
      scf.yield
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir

index 65b1f09..d4a8feb 100644 (file)
--- a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
@@ -628,7 +628,7 @@ func.func @same_enclosing_repetitive_region(%2: tensor<320xf32>,
        // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]}
        tensor.parallel_insert_slice %8 into %arg1[%arg0] [1] [1] : tensor<1xf32> into tensor<320xf32>
      }
-  } 
+  }
    return %4 : tensor<320xf32>
  }
  
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir

index 77a7345..ce408f7 100644 (file)
--- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
@@ -574,8 +574,8 @@ func.func @parallel_insert_slice_no_conflict(
  //  CHECK-SAME:     %[[arg1:.*]]: memref<?xf32, strided{{.*}}>,
  //  CHECK-SAME:     %[[arg2:.*]]: memref<?xf32, strided{{.*}}>
  func.func @parallel_insert_slice_with_conflict(
-    %idx: index, 
-    %idx2: index, 
+    %idx: index,
+    %idx2: index,
      %arg1: tensor<?xf32> {bufferization.writable = true},
      %arg2: tensor<?xf32> {bufferization.writable = true}) -> (f32, f32)
  {
@@ -872,7 +872,7 @@ func.func @scf_while_buffer_type_mismatch(%sz: index, %sz2: index) -> f32 {
  // -----
  
  // CHECK-LABEL: func @non_tensor_for_arg
-func.func @non_tensor_for_arg(%A : tensor<?xf32> {bufferization.writable = true}) 
+func.func @non_tensor_for_arg(%A : tensor<?xf32> {bufferization.writable = true})
      -> tensor<?xf32> {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
@@ -888,7 +888,7 @@ func.func @non_tensor_for_arg(%A : tensor<?xf32> {bufferization.writable = true}
  // -----
  
  // This is a regression test. Just check that the IR bufferizes.
-  
+
  // CHECK-LABEL: func @buffer_type_of_collapse_shape
  func.func @buffer_type_of_collapse_shape(%arg0: tensor<f64>) {
    %true = arith.constant true
@@ -906,10 +906,10 @@ func.func @buffer_type_of_collapse_shape(%arg0: tensor<f64>) {
  // -----
  
  // This is a regression test. Just check that the IR bufferizes.
-  
+
  // CHECK-LABEL: func @non_block_argument_yield
  func.func @non_block_argument_yield() {
-  %true = arith.constant true 
+  %true = arith.constant true
    %0 = bufferization.alloc_tensor() : tensor<i32>
    %1 = scf.while (%arg0 = %0) : (tensor<i32>) -> (tensor<i32>) {
      scf.condition(%true) %arg0 : tensor<i32>
diff --git a/mlir/test/Dialect/SPIRV/IR/asm-op-interface.mlir b/mlir/test/Dialect/SPIRV/IR/asm-op-interface.mlir

index c04112d..8e67d46 100644 (file)
--- a/mlir/test/Dialect/SPIRV/IR/asm-op-interface.mlir
+++ b/mlir/test/Dialect/SPIRV/IR/asm-op-interface.mlir
@@ -18,7 +18,7 @@ func.func @const() -> () {
    // CHECK: %cst_f64
    %5 = spirv.Constant 0.5 : f64
  
-  // CHECK: %cst_vec_3xi32 
+  // CHECK: %cst_vec_3xi32
    %6 = spirv.Constant dense<[1, 2, 3]> : vector<3xi32>
  
    // CHECK: %cst
@@ -33,7 +33,7 @@ spirv.module Logical GLSL450 {
    spirv.GlobalVariable @global_var : !spirv.ptr<f32, Input>
  
    spirv.func @addressof() -> () "None" {
-    // CHECK: %global_var_addr = spirv.mlir.addressof 
+    // CHECK: %global_var_addr = spirv.mlir.addressof
      %0 = spirv.mlir.addressof @global_var : !spirv.ptr<f32, Input>
      spirv.Return
    }
diff --git a/mlir/test/Dialect/SPIRV/IR/group-ops.mlir b/mlir/test/Dialect/SPIRV/IR/group-ops.mlir

index ef0695a..061d3f0 100644 (file)
--- a/mlir/test/Dialect/SPIRV/IR/group-ops.mlir
+++ b/mlir/test/Dialect/SPIRV/IR/group-ops.mlir
@@ -41,7 +41,7 @@ func.func @group_broadcast_vector(%value: vector<4xf32>, %localid: vector<3xi32>
  // -----
  
  func.func @group_broadcast_negative_scope(%value: f32, %localid: vector<3xi32> ) -> f32 {
-  // expected-error @+1 {{execution scope must be 'Workgroup' or 'Subgroup'}} 
+  // expected-error @+1 {{execution scope must be 'Workgroup' or 'Subgroup'}}
    %0 = spirv.GroupBroadcast <Device> %value, %localid : f32, vector<3xi32>
    return %0: f32
  }
diff --git a/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir b/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir

index 18140de..04c850f 100644 (file)
--- a/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir
+++ b/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir
@@ -47,7 +47,7 @@ func.func @sparse_push_back(%arg0: memref<?xindex>, %arg1: memref<?xf64>, %arg2:
  //       CHECK:   } do {
  //       CHECK:     ^bb0(%[[I2:.*]]: index):
  //       CHECK:     scf.yield %[[I2]] : index
-//       CHECK:   }  
+//       CHECK:   }
  //       CHECK:  %[[M2:.*]] = memref.realloc %[[B]](%[[P2]])
  //       CHECK:  scf.yield %[[M2]] : memref<?xf64>
  //       CHECK: } else {
diff --git a/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir b/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir

index 85c4688..e518efb 100644 (file)
--- a/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir
+++ b/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir
@@ -52,9 +52,9 @@ func.func @sparse_tensor_convert() -> tensor<20x40xf32> {
  }
  
  // CHECK-LABEL: func @update_notinplace(
-//  CHECK-SAME:    %[[argb:.*]]: tensor<10xf32> 
+//  CHECK-SAME:    %[[argb:.*]]: tensor<10xf32>
  // CHECK-FUNC-LABEL: func @update_notinplace(
-//  CHECK-FUNC-SAME:    %[[argb:.*]]: tensor<10xf32> 
+//  CHECK-FUNC-SAME:    %[[argb:.*]]: tensor<10xf32>
  func.func @update_notinplace(%argb: tensor<10xf32>, %arga: tensor<10xf32, #SV>)
    -> (tensor<10xf32>, tensor<10xf32>)
  {
diff --git a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir

index 1547b06..8c69066 100644 (file)
--- a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir
+++ b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir
@@ -99,6 +99,6 @@ func.func @sparse_new(%arg0: !llvm.ptr<i8>) -> tensor<?x?xf32, #CSR> {
  // CHECK:         return
  // CHECK:         }
  func.func @sparse_out( %arg0: tensor<10x20xf32, #CSR>, %arg1: !llvm.ptr<i8>) -> () {
-  sparse_tensor.out %arg0, %arg1 : tensor<10x20xf32, #CSR>, !llvm.ptr<i8> 
+  sparse_tensor.out %arg0, %arg1 : tensor<10x20xf32, #CSR>, !llvm.ptr<i8>
    return
  }
diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir

index ecaedae..44e5df7 100644 (file)
--- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir
+++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
@@ -415,7 +415,7 @@ func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #SparseMatrix>,
  
  // CHECK-LABEL: func @sparse_tensor_foreach(
  //  CHECK-SAME: %[[A0:.*]]: tensor<2x4xf64
-//       CHECK: sparse_tensor.foreach in %[[A0]] : 
+//       CHECK: sparse_tensor.foreach in %[[A0]] :
  //       CHECK:  ^bb0(%arg1: index, %arg2: index, %arg3: f64):
  func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>) -> () {
    sparse_tensor.foreach in %arg0 : tensor<2x4xf64, #DCSR> do {
@@ -429,7 +429,7 @@ func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>) -> () {
  #DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}>
  
  // CHECK-LABEL: func @sparse_tensor_foreach(
-//  CHECK-SAME:   %[[A0:.*]]: tensor<2x4xf64, #sparse_tensor.encoding<{{{.*}}}>>, 
+//  CHECK-SAME:   %[[A0:.*]]: tensor<2x4xf64, #sparse_tensor.encoding<{{{.*}}}>>,
  //  CHECK-SAME:   %[[A1:.*]]: f32
  //  CHECK-NEXT:   %[[RET:.*]] = sparse_tensor.foreach in %[[A0]] init(%[[A1]])
  //  CHECK-NEXT:    ^bb0(%[[TMP_1:.*]]: index, %[[TMP_2:.*]]: index, %[[TMP_v:.*]]: f64, %[[TMP_r:.*]]: f32)
@@ -438,7 +438,7 @@ func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>) -> () {
  func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>, %arg1: f32) -> () {
    %ret = sparse_tensor.foreach in %arg0 init(%arg1): tensor<2x4xf64, #DCSR>, f32 -> f32
    do {
-    ^bb0(%1: index, %2: index, %v: f64, %r: f32) : 
+    ^bb0(%1: index, %2: index, %v: f64, %r: f32) :
        sparse_tensor.yield %r : f32
    }
    return
diff --git a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir

index d1be066..8af3144 100644 (file)
--- a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
@@ -440,7 +440,7 @@ func.func @mul_affine_dense_dim_2d(%arga: tensor<34x16xf64, #CSR>,
  }
  
  // CHECK-LABEL:   func.func @mul_const_affine_dense_dim_2d(
-// CHECK-SAME:                                             %[[VAL_0:.*]]: tensor<34x16xf64, 
+// CHECK-SAME:                                             %[[VAL_0:.*]]: tensor<34x16xf64,
  // CHECK-SAME:                                             %[[VAL_1:.*]]: tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>>,
  // CHECK-SAME:                                             %[[VAL_2:.*]]: tensor<32x16xf64>) -> tensor<32x16xf64> {
  // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 19 : index
@@ -471,8 +471,8 @@ func.func @mul_affine_dense_dim_2d(%arga: tensor<34x16xf64, #CSR>,
  // CHECK:               %[[VAL_28:.*]] = arith.mulf %[[VAL_27]], %[[VAL_21]] : f64
  // CHECK:               %[[VAL_29:.*]] = arith.addf %[[VAL_26]], %[[VAL_28]] : f64
  // CHECK:               memref.store %[[VAL_29]], %[[VAL_14]]{{\[}}%[[VAL_18]], %[[VAL_25]]] : memref<32x16xf64>
-// CHECK:             } 
-// CHECK:           } 
+// CHECK:             }
+// CHECK:           }
  // CHECK:           %[[VAL_30:.*]] = bufferization.to_tensor %[[VAL_14]] : memref<32x16xf64>
  // CHECK:           return %[[VAL_30]] : tensor<32x16xf64>
  // CHECK:         }
diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir

index da744dc..021a76c 100644 (file)
--- a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
@@ -313,7 +313,7 @@ func.func @concat_mix_dense_perm_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3x
  
  // CHECK-LABEL: func.func @concat_mix_dense_perm_dim1_dyn(
  // CHECK-SAME:      %[[TMP_arg0:.*]]: tensor<3x2xf64>,
-// CHECK-SAME:      %[[TMP_arg1:.*]]: !llvm.ptr<i8>) 
+// CHECK-SAME:      %[[TMP_arg1:.*]]: !llvm.ptr<i8>)
  // CHECK-DAG:       %[[TMP_c2:.*]] = arith.constant 2 : index
  // CHECK-DAG:       %[[TMP_c6_i32:.*]] = arith.constant 6 : i32
  // CHECK-DAG:       %[[TMP_c1_i32:.*]] = arith.constant 1 : i32
diff --git a/mlir/test/Dialect/Tensor/extract-slice-from-collapse-shape.mlir b/mlir/test/Dialect/Tensor/extract-slice-from-collapse-shape.mlir

index ccbba90..541a54a 100644 (file)
--- a/mlir/test/Dialect/Tensor/extract-slice-from-collapse-shape.mlir
+++ b/mlir/test/Dialect/Tensor/extract-slice-from-collapse-shape.mlir
@@ -17,9 +17,9 @@ func.func @extract_slice_static(%input: tensor<3x5x7x11xf32>) -> tensor<20x11xf3
  // CHECK-DAG: %[[init:.+]] = tensor.empty() : tensor<20x11xf32>
  // CHECK-DAG: %[[tile:.+]] = scf.for %[[iv:.+]] = %[[c0]] to %[[c20]] step %[[c1]] iter_args(%[[iterArg:.+]] = %[[init]])
  //     CHECK:   %[[multiIndex:.+]]:3 = affine.delinearize_index %[[iv]] into (%[[c3]], %[[c5]], %[[c7]]
-//     CHECK:   %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 11] [1, 1, 1, 1] : 
-//     CHECK:   %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3]{{\]}} : 
-//     CHECK:   %[[update:.+]] = tensor.insert_slice %[[sliceFlat]] into %[[iterArg]][%[[iv]], 0] [1, 11] [1, 1] : 
+//     CHECK:   %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 11] [1, 1, 1, 1] :
+//     CHECK:   %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3]{{\]}} :
+//     CHECK:   %[[update:.+]] = tensor.insert_slice %[[sliceFlat]] into %[[iterArg]][%[[iv]], 0] [1, 11] [1, 1] :
  //     CHECK:   scf.yield %[[update]] :
  //     CHECK: return %[[tile]]
  
@@ -31,8 +31,8 @@ func.func @extract_slice_static(%input: tensor<3x5x7x11xf32>) -> tensor<20x11xf3
  // FOREACH-DAG: %[[init:.+]] = tensor.empty() : tensor<20x11xf32>
  //     FOREACH: %[[tile:.+]] = scf.foreach_thread (%[[iv:.+]]) in (%[[c20]]) shared_outs(%[[dest:.+]] = %[[init]])
  //     FOREACH:   %[[multiIndex:.+]]:3 = affine.delinearize_index %[[iv]] into (%[[c3]], %[[c5]], %[[c7]]
-//     FOREACH:   %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 11] [1, 1, 1, 1] : 
-//     FOREACH:   %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3]{{\]}} : 
+//     FOREACH:   %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 11] [1, 1, 1, 1] :
+//     FOREACH:   %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3]{{\]}} :
  //     FOREACH:   perform_concurrently
  // FOREACH-NEXT:   tensor.parallel_insert_slice %[[sliceFlat]] into %[[dest]][%[[iv]], 0] [1, 11] [1, 1] :
  //     FOREACH: return %[[tile]]
@@ -58,9 +58,9 @@ func.func @extract_slice_static_strided(%input: tensor<3x5x7x11xf32>) -> tensor<
  //     CHECK: %[[tile:.+]] = scf.for %[[iv:.+]] = %[[c0]] to %[[c10]] step %[[c1]] iter_args(%[[iterArg:.+]] = %[[init]])
  //     CHECK:   %[[inputIv:.+]] = affine.apply #[[$map0]](%[[iv]])
  //     CHECK:   %[[multiIndex:.+]]:3 = affine.delinearize_index %[[inputIv]] into (%[[c3]], %[[c5]], %[[c7]]
-//     CHECK:   %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 5] [1, 1, 1, 2] : 
-//     CHECK:   %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3]{{\]}} : 
-//     CHECK:   %[[update:.+]] = tensor.insert_slice %[[sliceFlat]] into %[[iterArg]][%[[iv]], 0] [1, 5] [1, 1] : 
+//     CHECK:   %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 5] [1, 1, 1, 2] :
+//     CHECK:   %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3]{{\]}} :
+//     CHECK:   %[[update:.+]] = tensor.insert_slice %[[sliceFlat]] into %[[iterArg]][%[[iv]], 0] [1, 5] [1, 1] :
  //     CHECK:   scf.yield %[[update]] :
  //     CHECK: return %[[tile]]
  
@@ -110,8 +110,8 @@ func.func @extract_slice_dynamic_multidim(%input: tensor<3x?x?x11x?xf32>, %offt0
  // CHECK-DAG: %[[c4:.+]] = arith.constant 4 : index
  // CHECK-DAG: %[[c11:.+]] = arith.constant 11 : index
  //     CHECK: %[[init:.+]] = tensor.empty(%[[sz1]], %[[sz2]]) : tensor<?x?xf32>
-// CHECK-DAG: %[[d1:.+]] = tensor.dim %[[arg0]], %[[c1]] : 
-// CHECK-DAG: %[[d2:.+]] = tensor.dim %[[arg0]], %[[c2]] : 
+// CHECK-DAG: %[[d1:.+]] = tensor.dim %[[arg0]], %[[c1]] :
+// CHECK-DAG: %[[d2:.+]] = tensor.dim %[[arg0]], %[[c2]] :
  // CHECK-DAG: %[[d4:.+]] = tensor.dim %[[arg0]], %[[c4]] :
  //     CHECK: %[[tile1:.+]] = scf.for %[[iv1:.+]] = %[[c0]] to %[[sz1]] step %[[c1]] iter_args(%[[iterArg1:.+]] = %[[init]])
  //     CHECK:   %[[tile2:.+]] = scf.for %[[iv2:.+]] = %[[c0]] to %[[sz2]] step %[[c1]] iter_args(%[[iterArg2:.+]] = %[[iterArg1]])
@@ -119,12 +119,12 @@ func.func @extract_slice_dynamic_multidim(%input: tensor<3x?x?x11x?xf32>, %offt0
  //     CHECK:       %[[multiIndex1:.+]]:3 = affine.delinearize_index %[[inputIv1]] into (%[[c3]], %[[d1]], %[[d2]]) :
  //     CHECK:       %[[inputIv2:.+]] = affine.apply #[[map0:.+]](%[[iv2]])[%[[lb2]]]
  //     CHECK:       %[[multiIndex2:.+]]:2 = affine.delinearize_index %[[inputIv2]] into (%[[c11]], %[[d4]]) :
-//     CHECK:       %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex1]]#0, %[[multiIndex1]]#1, %[[multiIndex1]]#2, %[[multiIndex2]]#0, %[[multiIndex2]]#1] [1, 1, 1, 1, 1] [1, 1, 1, 1, 1] : 
-//     CHECK:       %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3, 4]{{\]}} : 
-//     CHECK:       %[[update:.+]] = tensor.insert_slice %[[sliceFlat]] into %[[iterArg2]][%[[iv1]], %[[iv2]]] [1, 1] [1, 1] : 
+//     CHECK:       %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex1]]#0, %[[multiIndex1]]#1, %[[multiIndex1]]#2, %[[multiIndex2]]#0, %[[multiIndex2]]#1] [1, 1, 1, 1, 1] [1, 1, 1, 1, 1] :
+//     CHECK:       %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3, 4]{{\]}} :
+//     CHECK:       %[[update:.+]] = tensor.insert_slice %[[sliceFlat]] into %[[iterArg2]][%[[iv1]], %[[iv2]]] [1, 1] [1, 1] :
  //     CHECK:       scf.yield %[[update]] :
  //     CHECK:     scf.yield %[[tile2]] :
-//     CHECK:   return %[[tile1]] : 
+//     CHECK:   return %[[tile1]] :
  
  //     FOREACH: #[[map1:.+]] = affine_map<(d0)[s0] -> (d0 + s0)>
  //     FOREACH: func.func @extract_slice_dynamic_multidim(%[[arg0:.+]]: tensor<3x?x?x11x?xf32>, %[[lb1:.+]]: index, %[[sz1:.+]]: index, %[[lb2:.+]]: index, %[[sz2:.+]]: index)
@@ -134,16 +134,16 @@ func.func @extract_slice_dynamic_multidim(%input: tensor<3x?x?x11x?xf32>, %offt0
  // FOREACH-DAG: %[[c4:.+]] = arith.constant 4 : index
  // FOREACH-DAG: %[[c11:.+]] = arith.constant 11 : index
  //     FOREACH:     %[[init:.+]] = tensor.empty(%[[sz1]], %[[sz2]]) : tensor<?x?xf32>
-// FOREACH-DAG:     %[[d1:.+]] = tensor.dim %[[arg0]], %[[c1]] : 
-// FOREACH-DAG:     %[[d2:.+]] = tensor.dim %[[arg0]], %[[c2]] : 
+// FOREACH-DAG:     %[[d1:.+]] = tensor.dim %[[arg0]], %[[c1]] :
+// FOREACH-DAG:     %[[d2:.+]] = tensor.dim %[[arg0]], %[[c2]] :
  // FOREACH-DAG:     %[[d4:.+]] = tensor.dim %[[arg0]], %[[c4]] :
  //     FOREACH:     %[[tile1:.+]] = scf.foreach_thread (%[[tid1:.+]], %[[tid2:.+]]) in (%[[sz1]], %[[sz2]]) shared_outs(%[[dest:.+]] = %[[init]])
  // FOREACH-DAG:       %[[iv1:.+]] = affine.apply #[[map1]](%[[tid1]])[%[[lb1]]]
  //     FOREACH:       %[[multiIndex1:.+]]:3 = affine.delinearize_index %[[iv1]] into (%[[c3]], %[[d1]], %[[d2]]) :
  // FOREACH-DAG:       %[[iv2:.+]] = affine.apply #[[map1]](%[[tid2]])[%[[lb2]]]
  //     FOREACH:       %[[multiIndex2:.+]]:2 = affine.delinearize_index %[[iv2]] into (%[[c11]], %[[d4]]) :
-//     FOREACH:       %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex1]]#0, %[[multiIndex1]]#1, %[[multiIndex1]]#2, %[[multiIndex2]]#0, %[[multiIndex2]]#1] [1, 1, 1, 1, 1] [1, 1, 1, 1, 1] : 
-//     FOREACH:       %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3, 4]{{\]}} : 
+//     FOREACH:       %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex1]]#0, %[[multiIndex1]]#1, %[[multiIndex1]]#2, %[[multiIndex2]]#0, %[[multiIndex2]]#1] [1, 1, 1, 1, 1] [1, 1, 1, 1, 1] :
+//     FOREACH:       %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3, 4]{{\]}} :
  //     FOREACH:       perform_concurrently
  //FOREACH-NEXT:         tensor.parallel_insert_slice %[[sliceFlat]] into %[[dest]][%[[tid1]], %[[tid2]]] [1, 1] [1, 1] :
  
@@ -154,7 +154,7 @@ func.func @extract_slice_dynamic_multidim(%input: tensor<3x?x?x11x?xf32>, %offt0
  
  // CHECK: @extract_slice_non_sliced_linearized_dim(%[[arg0:.+]]: tensor<{{.*}}>,
  func.func @extract_slice_non_sliced_linearized_dim(%input: tensor<3x?x?x11x2xf32>, %offt: index, %size: index) -> tensor<?x22xf32> {
-  %collapsed = tensor.collapse_shape %input [[0, 1, 2], [3, 4]] : tensor<3x?x?x11x2xf32> into tensor<?x22xf32>  
+  %collapsed = tensor.collapse_shape %input [[0, 1, 2], [3, 4]] : tensor<3x?x?x11x2xf32> into tensor<?x22xf32>
    %slice = tensor.extract_slice %collapsed [%offt, 0] [%size, 22] [1, 1] : tensor<?x22xf32> to tensor<?x22xf32>
    // CHECK: scf.for
    // CHECK-NOT: scf.for
@@ -169,7 +169,7 @@ func.func @extract_slice_non_sliced_linearized_dim(%input: tensor<3x?x?x11x2xf32
  func.func @no_sliced_linearized_dims(%input: tensor<30x11x100xf32>, %offt: index, %size: index) -> tensor<330x?xf32> {
    %collapsed = tensor.collapse_shape %input [[0, 1], [2]] : tensor<30x11x100xf32> into tensor<330x100xf32>
    %slice = tensor.extract_slice %collapsed [0, %offt] [330, %size] [1, 1] : tensor<330x100xf32> to tensor<330x?xf32>
-  // CHECK-NOT: scf.for  
+  // CHECK-NOT: scf.for
    // CHECK: %[[init:.+]] = tensor.empty(%[[arg2]])
    // CHECK: %[[e:.+]] = tensor.extract_slice %[[arg0]][0, 0, %[[arg1]]] [30, 11, %[[arg2]]] [1, 1, 1]
    // CHECK: %[[c:.+]] = tensor.collapse_shape %[[e]] {{\[}}[0, 1], [2]]
@@ -191,7 +191,7 @@ func.func @collapse_and_slice_unit_dim(%input: tensor<1x11x100xf32>, %offt: inde
    // CHECK: %[[e:.+]] = tensor.extract_slice %[[arg0]][0, 0, 0] [1, 11, 100] [1, 1, 1]
    // CHECK-SAME:           tensor<1x11x100xf32> to tensor<11x100xf32>
    // CHECK: %[[e1:.+]] = tensor.extract_slice %[[e]][%[[arg1]], 0] [%[[arg2]], 100] [1, 1]
-  // CHECK-SAME:           tensor<11x100xf32> to tensor<?x100xf32>    
+  // CHECK-SAME:           tensor<11x100xf32> to tensor<?x100xf32>
    return %slice : tensor<?x100xf32>
  }
  
@@ -201,11 +201,11 @@ func.func @collapse_and_slice_multiple_unit_dim_dynamic(%input: tensor<1x?x1x100
    %slice = tensor.extract_slice %collapsed [%offt, 0] [%size, 100] [1, 1] : tensor<?x100xf32> to tensor<?x100xf32>
    // CHECK-NOT: scf.for
    // CHECK: %[[c1:.+]] = arith.constant 1 : index
-  // CHECK: %[[dim:.+]] = tensor.dim %[[arg0]], %[[c1]] : 
+  // CHECK: %[[dim:.+]] = tensor.dim %[[arg0]], %[[c1]] :
    // CHECK: %[[e:.+]] = tensor.extract_slice %[[arg0]][0, 0, 0, 0] [1, %[[dim]], 1, 100] [1, 1, 1, 1]
    // CHECK-SAME:           tensor<1x?x1x100xf32> to tensor<?x100xf32>
    // CHECK: %[[e1:.+]] = tensor.extract_slice %[[e]][%[[arg1]], 0] [%[[arg2]], 100] [1, 1]
-  // CHECK-SAME:           tensor<?x100xf32> to tensor<?x100xf32>  
+  // CHECK-SAME:           tensor<?x100xf32> to tensor<?x100xf32>
    return %slice : tensor<?x100xf32>
  }
  
@@ -228,14 +228,14 @@ func.func @collapse_and_slice_multiple_unit_dim_mixed(%input: tensor<1x?x1x100x1
    return %slice : tensor<?x?xf32>
  }
  
-// Edge case where all collapsed dims are unit dims. This pattern can't eliminate the collapse shape, 
+// Edge case where all collapsed dims are unit dims. This pattern can't eliminate the collapse shape,
  // that should be handled by `linalg-fold-unit-extent-dims`.
  
  // CHECK: @collapse_and_slice_multiple_all_unit_dim(%[[arg0:.+]]: tensor<{{.*}}>)
  func.func @collapse_and_slice_multiple_all_unit_dim(%input: tensor<1x1x1x100xf32>) -> tensor<1x100xf32> {
    %collapsed = tensor.collapse_shape %input [[0, 1, 2], [3]] : tensor<1x1x1x100xf32> into tensor<1x100xf32>
-  %slice = tensor.extract_slice %collapsed [0, 0] [1, 100] [1, 1] : tensor<1x100xf32> to tensor<1x100xf32>  
-  return %slice : tensor<1x100xf32>  
+  %slice = tensor.extract_slice %collapsed [0, 0] [1, 100] [1, 1] : tensor<1x100xf32> to tensor<1x100xf32>
+  return %slice : tensor<1x100xf32>
    // CHECK: %[[collapse:.+]] = tensor.collapse_shape %[[arg0]] {{\[}}[0, 1, 2], [3]] : tensor<1x1x1x100xf32> into tensor<1x100xf32>
-  // CHECK: return %[[collapse]]  
+  // CHECK: return %[[collapse]]
  }
diff --git a/mlir/test/Dialect/Tensor/invalid.mlir b/mlir/test/Dialect/Tensor/invalid.mlir

index 4a25cd4..af26648 100644 (file)
--- a/mlir/test/Dialect/Tensor/invalid.mlir
+++ b/mlir/test/Dialect/Tensor/invalid.mlir
@@ -455,7 +455,7 @@ func.func @gather_wrong_result_type(
  // -----
  
  func.func @scatter_empty_dims(
-    %source : tensor<f32>, 
+    %source : tensor<f32>,
      %dest : tensor<4x5x6xf32>, %indices: tensor<1x2x3xindex>) {
    // expected-error@+1 {{scatter_dims must be non-empty}}
    %out = tensor.scatter %source into %dest[%indices] scatter_dims([]) unique:
@@ -466,7 +466,7 @@ func.func @scatter_empty_dims(
  // -----
  
  func.func @scatter_coordinate_rank_overflow(
-    %source : tensor<f32>, 
+    %source : tensor<f32>,
      %dest : tensor<4x5x6xf32>, %indices: tensor<1x2x3xindex>) {
    // expected-error@+1 {{scatter_dims overflow dest rank}}
    %out = tensor.scatter %source into %dest[%indices] scatter_dims([0, 1, 2, 3]) unique:
@@ -477,7 +477,7 @@ func.func @scatter_coordinate_rank_overflow(
  // -----
  
  func.func @scatter_coordinate_negative(
-    %source : tensor<f32>, 
+    %source : tensor<f32>,
      %dest : tensor<4x5x6xf32>, %indices: tensor<1x2x3xindex>) {
    // expected-error@+1 {{scatter_dims value must be non-negative}}
    %out = tensor.scatter %source into %dest[%indices] scatter_dims([-1]) unique:
@@ -488,7 +488,7 @@ func.func @scatter_coordinate_negative(
  // -----
  
  func.func @scatter_coordinate_overflow(
-    %source : tensor<f32>, 
+    %source : tensor<f32>,
      %dest : tensor<4x5x6xf32>, %indices: tensor<1x2x3xindex>) {
    // expected-error@+1 {{scatter_dims value must be smaller than dest rank}}
    %out = tensor.scatter %source into %dest[%indices] scatter_dims([42]) unique:
@@ -499,7 +499,7 @@ func.func @scatter_coordinate_overflow(
  // -----
  
  func.func @scatter_coordinate_overflow(
-    %source : tensor<f32>, 
+    %source : tensor<f32>,
      %dest : tensor<4x5x6xf32>, %indices: tensor<1x2x3xindex>) {
    // expected-error@+1 {{scatter_dims values must be strictly increasing}}
    %out = tensor.scatter %source into %dest[%indices] scatter_dims([1, 0]) unique:
@@ -510,7 +510,7 @@ func.func @scatter_coordinate_overflow(
  // -----
  
  func.func @scatter_missing_unique(
-    %source : tensor<f32>, 
+    %source : tensor<f32>,
      %dest : tensor<4x5x6xf32>, %indices: tensor<1x2x3xindex>) {
    // expected-error@+1 {{requires 'unique' attribute to be set}}
    %out = tensor.scatter %source into %dest[%indices] scatter_dims([0, 2]):
@@ -521,7 +521,7 @@ func.func @scatter_missing_unique(
  // -----
  
  func.func @scatter_wrong_result_type(
-    %source : tensor<f32>, 
+    %source : tensor<f32>,
      %dest : tensor<4x5x6xf32>, %indices: tensor<1x2x3xindex>) {
    // expected-error@+1 {{source type mismatch: expected 'tensor<1x2x1x5x1xf32>' or its rank-reduced variant 'tensor<1x2x5xf32>' (got: 'tensor<f32>')}}
    %out = tensor.scatter %source into %dest[%indices] scatter_dims([0, 2]) unique:
diff --git a/mlir/test/Dialect/Tensor/ops.mlir b/mlir/test/Dialect/Tensor/ops.mlir

index 3bb6235..71a0489 100644 (file)
--- a/mlir/test/Dialect/Tensor/ops.mlir
+++ b/mlir/test/Dialect/Tensor/ops.mlir
@@ -286,7 +286,7 @@ func.func @pad_to_static_size(%arg0: tensor<?x?xf32>, %ub0: index, %ub1: index,
  func.func @test_splat_op(%s : f32) {
    // CHECK: tensor.splat [[S]] : tensor<8xf32>
    %v = tensor.splat %s : tensor<8xf32>
-  
+
    // CHECK: tensor.splat [[S]] : tensor<4xf32>
    %u = "tensor.splat"(%s) : (f32) -> tensor<4xf32>
    return
@@ -308,11 +308,11 @@ func.func @gather_scatter(
      (tensor<4x5x6xf32>, tensor<1x3x2xindex>) -> tensor<1x3x4xf32>
  
    // CHECK: %{{.*}} = tensor.scatter %[[GATHER]] into %[[ARG0]][%[[ARG1]]] scatter_dims([1, 2]) unique : (tensor<1x3x4x1x1xf32>, tensor<4x5x6xf32>, tensor<1x3x2xindex>) -> tensor<4x5x6xf32>
-  %scattered = tensor.scatter %gathered into %dest[%indices] 
+  %scattered = tensor.scatter %gathered into %dest[%indices]
        scatter_dims([1, 2]) unique:
      (tensor<1x3x4x1x1xf32>, tensor<4x5x6xf32>, tensor<1x3x2xindex>) -> tensor<4x5x6xf32>
    // CHECK: %{{.*}} = tensor.scatter %[[GATHER0]] into %[[ARG0]][%[[ARG2]]] scatter_dims([1, 2]) unique : (tensor<1x3x4xf32>, tensor<4x5x6xf32>, tensor<1x3x2xi32>) -> tensor<4x5x6xf32>
-  %rank_reduced_scattered = tensor.scatter %rank_reduced_gathered into %dest[%indices_i32] 
+  %rank_reduced_scattered = tensor.scatter %rank_reduced_gathered into %dest[%indices_i32]
        scatter_dims([1, 2]) unique:
      (tensor<1x3x4xf32>, tensor<4x5x6xf32>, tensor<1x3x2xi32>) -> tensor<4x5x6xf32>
    return
diff --git a/mlir/test/Dialect/Tosa/inlining.mlir b/mlir/test/Dialect/Tosa/inlining.mlir

index 7eac312..d57b5cb 100644 (file)
--- a/mlir/test/Dialect/Tosa/inlining.mlir
+++ b/mlir/test/Dialect/Tosa/inlining.mlir
@@ -9,11 +9,11 @@
  // CHECK-NOT: @sub
  func.func @inlined_if_fn(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2: tensor<i1>) -> tensor<f32> {
    %0 = "tosa.cond_if"(%arg2, %arg0, %arg1) ({
-  ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):  
+  ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
      %1 = call @add(%arg3, %arg4) : (tensor<f32>, tensor<f32>) -> tensor<f32>
      "tosa.yield"(%1) : (tensor<f32>) -> ()
    },  {
-  ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):  
+  ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
      %1 = call @sub(%arg3, %arg4) : (tensor<f32>, tensor<f32>) -> tensor<f32>
      "tosa.yield"(%1) : (tensor<f32>) -> ()
    }) : (tensor<i1>, tensor<f32>, tensor<f32>) -> tensor<f32>
@@ -35,11 +35,11 @@ func.func @inlined_while_fn(%arg0: tensor<i32>, %arg1: tensor<i32>, %arg2: tenso
    // Check that calls are inlined and functions eliminated:
    // CHECK-NOT: @while
    %1:4 = "tosa.while_loop"(%arg0, %arg1, %arg2, %arg3) ({
-  ^bb0(%arg4: tensor<i32>, %arg5: tensor<i32>, %arg6: tensor<i32>, %arg7: tensor<10xi32>):  
+  ^bb0(%arg4: tensor<i32>, %arg5: tensor<i32>, %arg6: tensor<i32>, %arg7: tensor<10xi32>):
      %2 = call @while_cond_40(%arg4, %arg5, %arg6, %arg7) : (tensor<i32>, tensor<i32>, tensor<i32>, tensor<10xi32>) -> tensor<i1>
      "tosa.yield"(%2) : (tensor<i1>) -> ()
    },  {
-  ^bb0(%arg4: tensor<i32>, %arg5: tensor<i32>, %arg6: tensor<i32>, %arg7: tensor<10xi32>):  
+  ^bb0(%arg4: tensor<i32>, %arg5: tensor<i32>, %arg6: tensor<i32>, %arg7: tensor<10xi32>):
      %2:4 = call @while_body_50(%arg4, %arg5, %arg6, %arg7) : (tensor<i32>, tensor<i32>, tensor<i32>, tensor<10xi32>) -> (tensor<i32>, tensor<i32>, tensor<i32>, tensor<10xi32>)
      "tosa.yield"(%2#0, %2#1, %2#2, %2#3) : (tensor<i32>, tensor<i32>, tensor<i32>, tensor<10xi32>) -> ()
    }) : (tensor<i32>, tensor<i32>, tensor<i32>, tensor<10xi32>) -> (tensor<i32>, tensor<i32>, tensor<i32>, tensor<10xi32>)
diff --git a/mlir/test/Dialect/Transform/check-use-after-free.mlir b/mlir/test/Dialect/Transform/check-use-after-free.mlir

index 94d7ced..a461d25 100644 (file)
--- a/mlir/test/Dialect/Transform/check-use-after-free.mlir
+++ b/mlir/test/Dialect/Transform/check-use-after-free.mlir
@@ -68,7 +68,7 @@ func.func @use_after_free_recursive_side_effects() {
      transform.sequence %0 : !pdl.operation failures(propagate) attributes { ord = 3 } {
      ^bb3(%arg3: !pdl.operation):
      }
-    
+
      // `transform.sequence` has recursive side effects so it has the same "free"
      // as the child op it contains.
      // expected-note @below {{freed here}}
@@ -100,7 +100,7 @@ func.func @use_after_free() {
      transform.sequence %0 : !pdl.operation failures(propagate) attributes { ord = 3 } {
      ^bb3(%arg3: !pdl.operation):
      }
-    
+
      // expected-note @below {{freed here}}
      test_consume_operand_if_matches_param_or_fail %0[42]
      // expected-warning @below {{operand #0 may be used after free}}
diff --git a/mlir/test/Dialect/Transform/test-interpreter.mlir b/mlir/test/Dialect/Transform/test-interpreter.mlir

index 8ca46b6..6246716 100644 (file)
--- a/mlir/test/Dialect/Transform/test-interpreter.mlir
+++ b/mlir/test/Dialect/Transform/test-interpreter.mlir
@@ -745,8 +745,8 @@ transform.with_pdl_patterns {
  
  func.func @get_parent_for_op_no_loop(%arg0: index, %arg1: index) {
    // expected-remark @below {{found muli}}
-  %0 = arith.muli %arg0, %arg1 : index  
-  arith.addi %0, %arg1 : index  
+  %0 = arith.muli %arg0, %arg1 : index
+  arith.addi %0, %arg1 : index
    return
  }
  
@@ -761,7 +761,7 @@ transform.sequence failures(propagate) {
  
  func.func @get_parent_for_op_no_loop(%arg0: index, %arg1: index) {
    // expected-note @below {{target op}}
-  %0 = arith.muli %arg0, %arg1 : index  
+  %0 = arith.muli %arg0, %arg1 : index
    return
  }
  
@@ -776,8 +776,8 @@ transform.sequence failures(propagate) {
  // -----
  
  func.func @split_handles(%a: index, %b: index, %c: index) {
-  %0 = arith.muli %a, %b : index  
-  %1 = arith.muli %a, %c : index  
+  %0 = arith.muli %a, %b : index
+  %1 = arith.muli %a, %c : index
    return
  }
  
@@ -795,8 +795,8 @@ transform.sequence failures(propagate) {
  // -----
  
  func.func @split_handles(%a: index, %b: index, %c: index) {
-  %0 = arith.muli %a, %b : index  
-  %1 = arith.muli %a, %c : index  
+  %0 = arith.muli %a, %b : index
+  %1 = arith.muli %a, %c : index
    return
  }
  
@@ -923,8 +923,8 @@ transform.with_pdl_patterns {
  // -----
  
  func.func @split_handles(%a: index, %b: index, %c: index) {
-  %0 = arith.muli %a, %b : index  
-  %1 = arith.muli %a, %c : index  
+  %0 = arith.muli %a, %b : index
+  %1 = arith.muli %a, %c : index
    return
  }
  
diff --git a/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir b/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir

index 1c42923..323f046 100644 (file)
--- a/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir
@@ -4,14 +4,14 @@ func.func @transfer_read_rank_reducing(
        %arg : memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>) -> vector<3x2xi8> {
      %c0 = arith.constant 0 : index
      %cst = arith.constant 0 : i8
-    %v = vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst : 
+    %v = vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst :
        memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>, vector<3x2xi8>
      return %v : vector<3x2xi8>
  }
  
  // CHECK-LABEL: func @transfer_read_rank_reducing
  //  CHECK-SAME:     %[[ARG:.+]]: memref<1x1x3x2xi8
-//       CHECK:   %[[SUBVIEW:.+]] = memref.subview %[[ARG]][0, 0, 0, 0] [1, 1, 3, 2] [1, 1, 1, 1] 
+//       CHECK:   %[[SUBVIEW:.+]] = memref.subview %[[ARG]][0, 0, 0, 0] [1, 1, 3, 2] [1, 1, 1, 1]
  //  CHECK-SAME:     memref<1x1x3x2xi8, {{.*}}> to memref<3x2xi8, {{.*}}>
  //       CHECK:   vector.transfer_read %[[SUBVIEW]]
  
@@ -19,13 +19,13 @@ func.func @transfer_read_rank_reducing(
  
  func.func @transfer_write_rank_reducing(%arg : memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>, %vec : vector<3x2xi8>) {
      %c0 = arith.constant 0 : index
-    vector.transfer_write %vec, %arg [%c0, %c0, %c0, %c0] : 
+    vector.transfer_write %vec, %arg [%c0, %c0, %c0, %c0] :
        vector<3x2xi8>, memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>
      return
  }
  
  // CHECK-LABEL: func @transfer_write_rank_reducing
  //  CHECK-SAME:     %[[ARG:.+]]: memref<1x1x3x2xi8
-//       CHECK:   %[[SUBVIEW:.+]] = memref.subview %[[ARG]][0, 0, 0, 0] [1, 1, 3, 2] [1, 1, 1, 1] 
+//       CHECK:   %[[SUBVIEW:.+]] = memref.subview %[[ARG]][0, 0, 0, 0] [1, 1, 3, 2] [1, 1, 1, 1]
  //  CHECK-SAME:     memref<1x1x3x2xi8, {{.*}}> to memref<3x2xi8, {{.*}}>
  //       CHECK:   vector.transfer_write %{{.*}}, %[[SUBVIEW]]
 \ No newline at end of file
diff --git a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir

index e4d7745..ae62a5b 100644 (file)
--- a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
@@ -4,7 +4,7 @@ func.func @transfer_read_flattenable_with_offset(
        %arg : memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>) -> vector<5x4x3x2xi8> {
      %c0 = arith.constant 0 : index
      %cst = arith.constant 0 : i8
-    %v = vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst : 
+    %v = vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst :
        memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>, vector<5x4x3x2xi8>
      return %v : vector<5x4x3x2xi8>
  }
@@ -21,7 +21,7 @@ func.func @transfer_read_flattenable_with_offset(
  func.func @transfer_write_flattenable_with_offset(
        %arg : memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>, %vec : vector<5x4x3x2xi8>) {
      %c0 = arith.constant 0 : index
-    vector.transfer_write %vec, %arg [%c0, %c0, %c0, %c0] : 
+    vector.transfer_write %vec, %arg [%c0, %c0, %c0, %c0] :
        vector<5x4x3x2xi8>, memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>
      return
  }
@@ -107,7 +107,7 @@ func.func @transfer_read_flattenable_negative(
        %arg : memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>) -> vector<2x2x2x2xi8> {
      %c0 = arith.constant 0 : index
      %cst = arith.constant 0 : i8
-    %v = vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst : 
+    %v = vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst :
        memref<5x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>, vector<2x2x2x2xi8>
      return %v : vector<2x2x2x2xi8>
  }
@@ -121,7 +121,7 @@ func.func @transfer_read_flattenable_negative2(
        %arg : memref<5x4x3x2xi8, strided<[24, 8, 2, 1], offset: ?>>) -> vector<5x4x3x2xi8> {
      %c0 = arith.constant 0 : index
      %cst = arith.constant 0 : i8
-    %v = vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst : 
+    %v = vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst :
        memref<5x4x3x2xi8, strided<[24, 8, 2, 1], offset: ?>>, vector<5x4x3x2xi8>
      return %v : vector<5x4x3x2xi8>
  }
diff --git a/mlir/test/Dialect/Vector/vector-transferop-opt.mlir b/mlir/test/Dialect/Vector/vector-transferop-opt.mlir

index 8130c75..af4e7d8 100644 (file)
--- a/mlir/test/Dialect/Vector/vector-transferop-opt.mlir
+++ b/mlir/test/Dialect/Vector/vector-transferop-opt.mlir
@@ -194,7 +194,7 @@ func.func @dead_store_nested_region(%arg0: i1, %arg1: i1, %arg2 : memref<4x4xf32
  //       CHECK:   return
  func.func @forward_dead_store_negative(%arg0: i1, %arg1 : memref<4x4xf32>,
    %v0 : vector<1x4xf32>, %v1 : vector<1x1xf32>, %v2 : vector<1x4xf32>, %i : index) -> vector<1x4xf32> {
-  %alias = memref.subview %arg1[0, 0] [2, 2] [1, 1] : 
+  %alias = memref.subview %arg1[0, 0] [2, 2] [1, 1] :
      memref<4x4xf32> to memref<2x2xf32, strided<[4, 1]>>
    %c1 = arith.constant 1 : index
    %c4 = arith.constant 4 : index
@@ -209,9 +209,9 @@ func.func @forward_dead_store_negative(%arg0: i1, %arg1 : memref<4x4xf32>,
      vector<1x4xf32>, memref<4x4xf32>
    // blocking write.
    vector.transfer_write %v1, %alias[%c1, %c0] {in_bounds = [true, true]} :
-    vector<1x1xf32>, memref<2x2xf32, strided<[4, 1]>>    
+    vector<1x1xf32>, memref<2x2xf32, strided<[4, 1]>>
    %0 = vector.transfer_read %arg1[%c1, %c0], %cf0 {in_bounds = [true, true]} :
-    memref<4x4xf32>, vector<1x4xf32>  
+    memref<4x4xf32>, vector<1x4xf32>
    vector.transfer_write %v2, %arg1[%c1, %c0] {in_bounds = [true, true]} :
      vector<1x4xf32>, memref<4x4xf32>
    return %0 : vector<1x4xf32>
diff --git a/mlir/test/Dialect/Vector/vector-unroll-options.mlir b/mlir/test/Dialect/Vector/vector-unroll-options.mlir

index db6a40d..c51fc75 100644 (file)
--- a/mlir/test/Dialect/Vector/vector-unroll-options.mlir
+++ b/mlir/test/Dialect/Vector/vector-unroll-options.mlir
@@ -15,66 +15,66 @@ func.func @vector_contract_f32(%lhs : vector<8x4xf32>, %rhs : vector<8x4xf32>,
  // CHECK-LABEL: func @vector_contract_f32
  // CHECK-SAME: [[arg0:%.+]]: vector<8x4xf32>, [[arg1:%.+]]: vector<8x4xf32>, [[arg2:%.+]]: vector<8x8xf32>
  
-//       CHECK:   [[a:%.+]] = vector.extract_strided_slice [[arg0]] 
+//       CHECK:   [[a:%.+]] = vector.extract_strided_slice [[arg0]]
  //  CHECK-SAME:   offsets = [0, 0]
-//       CHECK:   [[b:%.+]] = vector.extract_strided_slice [[arg1]] 
+//       CHECK:   [[b:%.+]] = vector.extract_strided_slice [[arg1]]
  //  CHECK-SAME:   offsets = [0, 0]
-//       CHECK:   [[c:%.+]] = vector.extract_strided_slice [[arg2]] 
+//       CHECK:   [[c:%.+]] = vector.extract_strided_slice [[arg2]]
  //  CHECK-SAME:   offsets = [0, 0]
  //       CHECK:   [[accum1:%.+]] = vector.contract {{{.*}}} [[a]], [[b]], [[c]]
  //  CHECK-SAME:     vector<4x2xf32>, vector<4x2xf32> into vector<4x4xf32>
  
-//       CHECK:   [[a:%.+]] = vector.extract_strided_slice [[arg0]] 
+//       CHECK:   [[a:%.+]] = vector.extract_strided_slice [[arg0]]
  //  CHECK-SAME:   offsets = [0, 2]
-//       CHECK:   [[b:%.+]] = vector.extract_strided_slice [[arg1]] 
+//       CHECK:   [[b:%.+]] = vector.extract_strided_slice [[arg1]]
  //  CHECK-SAME:   offsets = [0, 2]
  //       CHECK:   [[accum2:%.+]] = vector.contract {{{.*}}} [[a]], [[b]], [[accum1]]
  //  CHECK-SAME:     vector<4x2xf32>, vector<4x2xf32> into vector<4x4xf32>
  
-//       CHECK:   [[a:%.+]] = vector.extract_strided_slice [[arg0]] 
+//       CHECK:   [[a:%.+]] = vector.extract_strided_slice [[arg0]]
  //  CHECK-SAME:   offsets = [0, 0]
-//       CHECK:   [[b:%.+]] = vector.extract_strided_slice [[arg1]] 
+//       CHECK:   [[b:%.+]] = vector.extract_strided_slice [[arg1]]
  //  CHECK-SAME:   offsets = [4, 0]
-//       CHECK:   [[c:%.+]] = vector.extract_strided_slice [[arg2]] 
+//       CHECK:   [[c:%.+]] = vector.extract_strided_slice [[arg2]]
  //  CHECK-SAME:   offsets = [0, 4]
  //       CHECK:   [[accum3:%.+]] = vector.contract {{{.*}}} [[a]], [[b]], [[c]]
  //  CHECK-SAME:     vector<4x2xf32>, vector<4x2xf32> into vector<4x4xf32>
  
-//       CHECK:   [[a:%.+]] = vector.extract_strided_slice [[arg0]] 
+//       CHECK:   [[a:%.+]] = vector.extract_strided_slice [[arg0]]
  //  CHECK-SAME:   offsets = [0, 2]
-//       CHECK:   [[b:%.+]] = vector.extract_strided_slice [[arg1]] 
+//       CHECK:   [[b:%.+]] = vector.extract_strided_slice [[arg1]]
  //  CHECK-SAME:   offsets = [4, 2]
  //       CHECK:   [[accum4:%.+]] = vector.contract {{{.*}}} [[a]], [[b]], [[accum3]]
  //  CHECK-SAME:     vector<4x2xf32>, vector<4x2xf32> into vector<4x4xf32>
  
-//       CHECK:   [[a:%.+]] = vector.extract_strided_slice [[arg0]] 
+//       CHECK:   [[a:%.+]] = vector.extract_strided_slice [[arg0]]
  //  CHECK-SAME:   offsets = [4, 0]
-//       CHECK:   [[b:%.+]] = vector.extract_strided_slice [[arg1]] 
+//       CHECK:   [[b:%.+]] = vector.extract_strided_slice [[arg1]]
  //  CHECK-SAME:   offsets = [0, 0]
-//       CHECK:   [[c:%.+]] = vector.extract_strided_slice [[arg2]] 
+//       CHECK:   [[c:%.+]] = vector.extract_strided_slice [[arg2]]
  //  CHECK-SAME:   offsets = [4, 0]
  //       CHECK:   [[accum5:%.+]] = vector.contract {{{.*}}} [[a]], [[b]], [[c]]
  //  CHECK-SAME:     vector<4x2xf32>, vector<4x2xf32> into vector<4x4xf32>
  
-//       CHECK:   [[a:%.+]] = vector.extract_strided_slice [[arg0]] 
+//       CHECK:   [[a:%.+]] = vector.extract_strided_slice [[arg0]]
  //  CHECK-SAME:   offsets = [4, 2]
-//       CHECK:   [[b:%.+]] = vector.extract_strided_slice [[arg1]] 
+//       CHECK:   [[b:%.+]] = vector.extract_strided_slice [[arg1]]
  //  CHECK-SAME:   offsets = [0, 2]
  //       CHECK:   [[accum6:%.+]] = vector.contract {{{.*}}} [[a]], [[b]], [[accum5]]
  //  CHECK-SAME:     vector<4x2xf32>, vector<4x2xf32> into vector<4x4xf32>
  
-//       CHECK:   [[a:%.+]] = vector.extract_strided_slice [[arg0]] 
+//       CHECK:   [[a:%.+]] = vector.extract_strided_slice [[arg0]]
  //  CHECK-SAME:   offsets = [4, 0]
-//       CHECK:   [[b:%.+]] = vector.extract_strided_slice [[arg1]] 
+//       CHECK:   [[b:%.+]] = vector.extract_strided_slice [[arg1]]
  //  CHECK-SAME:   offsets = [4, 0]
-//       CHECK:   [[c:%.+]] = vector.extract_strided_slice [[arg2]] 
+//       CHECK:   [[c:%.+]] = vector.extract_strided_slice [[arg2]]
  //  CHECK-SAME:   offsets = [4, 4]
  //       CHECK:   [[accum7:%.+]] = vector.contract {{{.*}}} [[a]], [[b]], [[c]]
  //  CHECK-SAME:     vector<4x2xf32>, vector<4x2xf32> into vector<4x4xf32>
  
-//       CHECK:   [[a:%.+]] = vector.extract_strided_slice [[arg0]] 
+//       CHECK:   [[a:%.+]] = vector.extract_strided_slice [[arg0]]
  //  CHECK-SAME:   offsets = [4, 2]
-//       CHECK:   [[b:%.+]] = vector.extract_strided_slice [[arg1]] 
+//       CHECK:   [[b:%.+]] = vector.extract_strided_slice [[arg1]]
  //  CHECK-SAME:   offsets = [4, 2]
  //       CHECK:   [[accum8:%.+]] = vector.contract {{{.*}}} [[a]], [[b]], [[accum7]]
  //  CHECK-SAME:     vector<4x2xf32>, vector<4x2xf32> into vector<4x4xf32>
@@ -84,66 +84,66 @@ func.func @vector_contract_f32(%lhs : vector<8x4xf32>, %rhs : vector<8x4xf32>,
  // ORDER-LABEL: func @vector_contract_f32
  // ORDER-SAME: [[arg0:%.+]]: vector<8x4xf32>, [[arg1:%.+]]: vector<8x4xf32>, [[arg2:%.+]]: vector<8x8xf32>
  
-//       ORDER:   [[a:%.+]] = vector.extract_strided_slice [[arg0]] 
+//       ORDER:   [[a:%.+]] = vector.extract_strided_slice [[arg0]]
  //  ORDER-SAME:   offsets = [0, 0]
-//       ORDER:   [[b:%.+]] = vector.extract_strided_slice [[arg1]] 
+//       ORDER:   [[b:%.+]] = vector.extract_strided_slice [[arg1]]
  //  ORDER-SAME:   offsets = [0, 0]
-//       ORDER:   [[c:%.+]] = vector.extract_strided_slice [[arg2]] 
+//       ORDER:   [[c:%.+]] = vector.extract_strided_slice [[arg2]]
  //  ORDER-SAME:   offsets = [0, 0]
  //       ORDER:   [[accum1:%.+]] = vector.contract {{{.*}}} [[a]], [[b]], [[c]]
  //  ORDER-SAME:     vector<4x2xf32>, vector<4x2xf32> into vector<4x4xf32>
  
-//       ORDER:   [[a:%.+]] = vector.extract_strided_slice [[arg0]] 
+//       ORDER:   [[a:%.+]] = vector.extract_strided_slice [[arg0]]
  //  ORDER-SAME:   offsets = [0, 0]
-//       ORDER:   [[b:%.+]] = vector.extract_strided_slice [[arg1]] 
+//       ORDER:   [[b:%.+]] = vector.extract_strided_slice [[arg1]]
  //  ORDER-SAME:   offsets = [4, 0]
-//       ORDER:   [[c:%.+]] = vector.extract_strided_slice [[arg2]] 
+//       ORDER:   [[c:%.+]] = vector.extract_strided_slice [[arg2]]
  //  ORDER-SAME:   offsets = [0, 4]
  //       ORDER:   [[accum2:%.+]] = vector.contract {{{.*}}} [[a]], [[b]], [[c]]
  //  ORDER-SAME:     vector<4x2xf32>, vector<4x2xf32> into vector<4x4xf32>
  
-//       ORDER:   [[a:%.+]] = vector.extract_strided_slice [[arg0]] 
+//       ORDER:   [[a:%.+]] = vector.extract_strided_slice [[arg0]]
  //  ORDER-SAME:   offsets = [4, 0]
-//       ORDER:   [[b:%.+]] = vector.extract_strided_slice [[arg1]] 
+//       ORDER:   [[b:%.+]] = vector.extract_strided_slice [[arg1]]
  //  ORDER-SAME:   offsets = [0, 0]
-//       ORDER:   [[c:%.+]] = vector.extract_strided_slice [[arg2]] 
+//       ORDER:   [[c:%.+]] = vector.extract_strided_slice [[arg2]]
  //  ORDER-SAME:   offsets = [4, 0]
  //       ORDER:   [[accum3:%.+]] = vector.contract {{{.*}}} [[a]], [[b]], [[c]]
  //  ORDER-SAME:     vector<4x2xf32>, vector<4x2xf32> into vector<4x4xf32>
  
-//       ORDER:   [[a:%.+]] = vector.extract_strided_slice [[arg0]] 
+//       ORDER:   [[a:%.+]] = vector.extract_strided_slice [[arg0]]
  //  ORDER-SAME:   offsets = [4, 0]
-//       ORDER:   [[b:%.+]] = vector.extract_strided_slice [[arg1]] 
+//       ORDER:   [[b:%.+]] = vector.extract_strided_slice [[arg1]]
  //  ORDER-SAME:   offsets = [4, 0]
-//       ORDER:   [[c:%.+]] = vector.extract_strided_slice [[arg2]] 
+//       ORDER:   [[c:%.+]] = vector.extract_strided_slice [[arg2]]
  //  ORDER-SAME:   offsets = [4, 4]
  //       ORDER:   [[accum4:%.+]] = vector.contract {{{.*}}} [[a]], [[b]], [[c]]
  //  ORDER-SAME:     vector<4x2xf32>, vector<4x2xf32> into vector<4x4xf32>
  
-//       ORDER:   [[a:%.+]] = vector.extract_strided_slice [[arg0]] 
+//       ORDER:   [[a:%.+]] = vector.extract_strided_slice [[arg0]]
  //  ORDER-SAME:   offsets = [0, 2]
-//       ORDER:   [[b:%.+]] = vector.extract_strided_slice [[arg1]] 
+//       ORDER:   [[b:%.+]] = vector.extract_strided_slice [[arg1]]
  //  ORDER-SAME:   offsets = [0, 2]
  //       ORDER:   [[accum5:%.+]] = vector.contract {{{.*}}} [[a]], [[b]], [[accum1]]
  //  ORDER-SAME:     vector<4x2xf32>, vector<4x2xf32> into vector<4x4xf32>
  
-//       ORDER:   [[a:%.+]] = vector.extract_strided_slice [[arg0]] 
+//       ORDER:   [[a:%.+]] = vector.extract_strided_slice [[arg0]]
  //  ORDER-SAME:   offsets = [0, 2]
-//       ORDER:   [[b:%.+]] = vector.extract_strided_slice [[arg1]] 
+//       ORDER:   [[b:%.+]] = vector.extract_strided_slice [[arg1]]
  //  ORDER-SAME:   offsets = [4, 2]
  //       ORDER:   [[accum6:%.+]] = vector.contract {{{.*}}} [[a]], [[b]], [[accum2]]
  //  ORDER-SAME:     vector<4x2xf32>, vector<4x2xf32> into vector<4x4xf32>
  
-//       ORDER:   [[a:%.+]] = vector.extract_strided_slice [[arg0]] 
+//       ORDER:   [[a:%.+]] = vector.extract_strided_slice [[arg0]]
  //  ORDER-SAME:   offsets = [4, 2]
-//       ORDER:   [[b:%.+]] = vector.extract_strided_slice [[arg1]] 
+//       ORDER:   [[b:%.+]] = vector.extract_strided_slice [[arg1]]
  //  ORDER-SAME:   offsets = [0, 2]
  //       ORDER:   [[accum7:%.+]] = vector.contract {{{.*}}} [[a]], [[b]], [[accum3]]
  //  ORDER-SAME:     vector<4x2xf32>, vector<4x2xf32> into vector<4x4xf32>
  
-//       ORDER:   [[a:%.+]] = vector.extract_strided_slice [[arg0]] 
+//       ORDER:   [[a:%.+]] = vector.extract_strided_slice [[arg0]]
  //  ORDER-SAME:   offsets = [4, 2]
-//       ORDER:   [[b:%.+]] = vector.extract_strided_slice [[arg1]] 
+//       ORDER:   [[b:%.+]] = vector.extract_strided_slice [[arg1]]
  //  ORDER-SAME:   offsets = [4, 2]
  //       ORDER:   [[accum8:%.+]] = vector.contract {{{.*}}} [[a]], [[b]], [[accum4]]
  //  ORDER-SAME:     vector<4x2xf32>, vector<4x2xf32> into vector<4x4xf32>
@@ -266,7 +266,7 @@ func.func @vector_tranpose(%v : vector<2x4x3x8xf32>) -> vector<2x3x8x4xf32> {
  
  // -----
  
-func.func @vector_contract_batched(%lhs: vector<8x8x4xf32>, %rhs: vector<8x8x4xf32>, %init: vector<8x8x8xf32>) -> vector<8x8x8xf32> {    
+func.func @vector_contract_batched(%lhs: vector<8x8x4xf32>, %rhs: vector<8x8x4xf32>, %init: vector<8x8x8xf32>) -> vector<8x8x8xf32> {
    %0 = vector.contract
           {indexing_maps = [affine_map<(d0,d1,d2,c0) -> (d0,d1,c0)>,
                             affine_map<(d0,d1,d2,c0) -> (d0,d2,c0)>,
@@ -280,7 +280,7 @@ func.func @vector_contract_batched(%lhs: vector<8x8x4xf32>, %rhs: vector<8x8x4xf
  //    CHECK-LABEL: vector_contract_batched
  // CHECK-COUNT-16: vector.contract
  //      CHECK-NOT: vector.contract
-//          CHECK: return 
+//          CHECK: return
  
  //    UNROLL-LABEL: vector_contract_batched
  //  UNROLL-COUNT-1: vector.contract
diff --git a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir

index b698745..3489054 100644 (file)
--- a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
+++ b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
@@ -750,7 +750,7 @@ func.func @dont_duplicate_read(
  // -----
  
  // CHECK-PROP:   func @dedup
-func.func @dedup(%laneid: index, %v0: vector<4xf32>, %v1: vector<4xf32>) 
+func.func @dedup(%laneid: index, %v0: vector<4xf32>, %v1: vector<4xf32>)
      -> (vector<1xf32>, vector<1xf32>) {
  
    // CHECK-PROP: %[[SINGLE_RES:.*]] = vector.warp_execute_on_lane_0{{.*}} -> (vector<1xf32>) {
@@ -772,7 +772,7 @@ func.func @dedup(%laneid: index, %v0: vector<4xf32>, %v1: vector<4xf32>)
  // -----
  
  // CHECK-SCF-IF:   func @warp_execute_has_broadcast_semantics
-func.func @warp_execute_has_broadcast_semantics(%laneid: index, %s0: f32, %v0: vector<f32>, %v1: vector<1xf32>, %v2: vector<1x1xf32>) 
+func.func @warp_execute_has_broadcast_semantics(%laneid: index, %s0: f32, %v0: vector<f32>, %v1: vector<1xf32>, %v2: vector<1x1xf32>)
      -> (f32, vector<f32>, vector<1xf32>, vector<1x1xf32>) {
    // CHECK-SCF-IF-DAG: %[[C0:.*]] = arith.constant 0 : index
  
@@ -818,7 +818,7 @@ func.func @warp_execute_has_broadcast_semantics(%laneid: index, %s0: f32, %v0: v
  
  // CHECK-SCF-IF:   func @warp_execute_nd_distribute
  // CHECK-SCF-IF-SAME: (%[[LANEID:.*]]: index
-func.func @warp_execute_nd_distribute(%laneid: index, %v0: vector<1x64x1xf32>, %v1: vector<1x2x128xf32>) 
+func.func @warp_execute_nd_distribute(%laneid: index, %v0: vector<1x64x1xf32>, %v1: vector<1x2x128xf32>)
      -> (vector<1x64x1xf32>, vector<1x2x128xf32>) {
    // CHECK-SCF-IF-DAG: %[[C0:.*]] = arith.constant 0 : index
  
diff --git a/mlir/test/IR/invalid-affinemap.mlir b/mlir/test/IR/invalid-affinemap.mlir

index 533cc23..82aebf1 100644 (file)
--- a/mlir/test/IR/invalid-affinemap.mlir
+++ b/mlir/test/IR/invalid-affinemap.mlir
@@ -29,7 +29,7 @@
  #hello_world = affine_map<(i, j) [s0] -> (((s0 + (i + j) + 5), j)> // expected-error {{expected ')'}}
  
  // -----
- 
+
  // expected-error @+1 {{expected '(' in affine map range}}
  #hello_world = affine_map<(i, j) [s0] -> i + s0, j)>
  
diff --git a/mlir/test/IR/invalid-locations.mlir b/mlir/test/IR/invalid-locations.mlir

index fb5b80f..bb80064 100644 (file)
--- a/mlir/test/IR/invalid-locations.mlir
+++ b/mlir/test/IR/invalid-locations.mlir
@@ -75,7 +75,7 @@ func.func @location_fused_missing_greater() {
  func.func @location_fused_missing_metadata() {
  ^bb:
    // expected-error@+1 {{expected attribute value}}
-  return loc(fused<) 
+  return loc(fused<)
  }
  
  // -----
diff --git a/mlir/test/IR/invalid.mlir b/mlir/test/IR/invalid.mlir

index 226dfdd..dd97fcb 100644 (file)
--- a/mlir/test/IR/invalid.mlir
+++ b/mlir/test/IR/invalid.mlir
@@ -651,7 +651,7 @@ func.func @invalid_region_dominance_with_dominance_free_regions() {
  // expected at the end of foo, not on the return line.
  func.func @error_at_end_of_line() {
    // expected-error@+1 {{expected ':' followed by operation type}}
-  %0 = "foo"() 
+  %0 = "foo"()
    return
  }
  
@@ -660,7 +660,7 @@ func.func @error_at_end_of_line() {
  // This makes sure we emit an error at the end of the correct line, the : is
  // expected at the end of foo, not on the return line.
  func.func @error_at_end_of_line() {
-  %0 = "foo"() 
+  %0 = "foo"()
    // expected-error@-1 {{expected ':' followed by operation type}}
  
    // This is a comment and so is the thing above.
diff --git a/mlir/test/IR/print-value-users.mlir b/mlir/test/IR/print-value-users.mlir

index bf16bfa..8d02944 100644 (file)
--- a/mlir/test/IR/print-value-users.mlir
+++ b/mlir/test/IR/print-value-users.mlir
@@ -13,15 +13,15 @@ module {
          // CHECK-NEXT: // id: %[[ARG0U3]]
          "test.no_result"(%arg0) {} : (i32) -> ()
          // Check for unused result.
-        // CHECK-NEXT: %[[ARG0U2]] = 
+        // CHECK-NEXT: %[[ARG0U2]] =
          // CHECK-SAME: // unused
          %1 = "test.unused_result"(%arg0, %arg1) {} : (i32, i32) -> i32
          // Check that both users are printed.
-        // CHECK-NEXT: %[[ARG0U1]] = 
+        // CHECK-NEXT: %[[ARG0U1]] =
          // CHECK-SAME: // users: %[[A:.+]]#0, %[[A]]#1
          %2 = "test.one_result"(%arg0, %arg1) {} : (i32, i32) -> i32
          // For multiple results, users should be grouped per result.
-        // CHECK-NEXT: %[[A]]:2 = 
+        // CHECK-NEXT: %[[A]]:2 =
          // CHECK-SAME: // users: (%[[B:.+]], %[[C:.+]]), (%[[B]], %[[D:.+]])
          %3:2 = "test.many_results"(%2) {} : (i32) -> (i32, i32)
          // Two results are produced, but there is only one user.
diff --git a/mlir/test/IR/test-clone.mlir b/mlir/test/IR/test-clone.mlir

index 7c72028..575098b 100644 (file)
--- a/mlir/test/IR/test-clone.mlir
+++ b/mlir/test/IR/test-clone.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(test-clone))" -split-input-file 
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(test-clone))" -split-input-file
  
  module {
    func.func @fixpoint(%arg1 : i32) -> i32 {
diff --git a/mlir/test/IR/test-take-body.mlir b/mlir/test/IR/test-take-body.mlir

index 1904ce0..7edb3b8 100644 (file)
--- a/mlir/test/IR/test-take-body.mlir
+++ b/mlir/test/IR/test-take-body.mlir
@@ -3,7 +3,7 @@
  func.func @foo() {
    %0 = "test.foo"() : () -> i32
    cf.br ^header
-       
+
  ^header:
    cf.br ^body
  
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir

index ab4f380..0f50b8c 100644 (file)
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir
@@ -413,7 +413,7 @@ module {
      %1 = sparse_tensor.values %A : tensor<4x4xi8, #DCSR> to memref<?xi8>
      %2 = vector.transfer_read %1[%c0], %du: memref<?xi8>, vector<16xi8>
      vector.print %2 : vector<16xi8>
-  
+
      return
    }
  
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_codegen_foreach.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_codegen_foreach.mlir

index 1c9d562..b87a6ff 100644 (file)
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_codegen_foreach.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_codegen_foreach.mlir
@@ -124,7 +124,7 @@ module {
     }
     return
    }
-  
+
    //
    // Main driver.
    //
@@ -136,11 +136,11 @@ module {
         [[  1.0,  2.0],
          [  5.0,  6.0]]
      > : tensor<2x2xf64>
-    
+
      %src3d = arith.constant sparse<
-       [[1, 2, 3], [4, 5, 6]], [1.0, 2.0] 
+       [[1, 2, 3], [4, 5, 6]], [1.0, 2.0]
      > : tensor<7x8x9xf64>
-    
+
      //
      // Convert dense tensor directly to various sparse tensors.
      //
@@ -168,7 +168,7 @@ module {
      // CHECK-NEXT: 5
      // CHECK-NEXT: 1
      // CHECK-NEXT: 1
-    // CHECK-NEXT: 6    
+    // CHECK-NEXT: 6
      call @foreach_print_dense(%src) : (tensor<2x2xf64>) -> ()
      // CHECK-NEXT: 0
      // CHECK-NEXT: 0
@@ -245,7 +245,7 @@ module {
      // CHECK-NEXT: 6
      // CHECK-NEXT: 2
      call @foreach_print_3d(%s6): (tensor<7x8x9xf64, #CCCPerm>) -> ()
-    
+
      bufferization.dealloc_tensor %s1 : tensor<2x2xf64, #Row>
      bufferization.dealloc_tensor %s2 : tensor<2x2xf64, #CSR>
      bufferization.dealloc_tensor %s3 : tensor<2x2xf64, #DCSC>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir

index 07a2250..98fe518 100644 (file)
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir
@@ -115,7 +115,7 @@ func.func @entry() {
    bufferization.dealloc_tensor %in1D_nwc : tensor<?x?x?xf32>
    bufferization.dealloc_tensor %filter1D_nwc : tensor<?x?x?xf32>
    bufferization.dealloc_tensor %out1D_nwc : tensor<?x?x?xf32>
-  
+
    bufferization.dealloc_tensor %in1D_nwc_CDC : tensor<?x?x?xf32, #CDC>
    bufferization.dealloc_tensor %filter1D_nwc_CDC : tensor<?x?x?xf32, #CDC>
    bufferization.dealloc_tensor %in1D_nwc_CCC : tensor<?x?x?xf32, #CCC>
@@ -123,6 +123,6 @@ func.func @entry() {
  
    bufferization.dealloc_tensor %CCC_ret : tensor<?x?x?xf32, #CCC>
    bufferization.dealloc_tensor %CDC_ret : tensor<?x?x?xf32, #CDC>
-  
+
    return
  }
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir

index 06ddef8..3341b53 100644 (file)
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir
@@ -30,7 +30,7 @@ module {
  
    func.func @conv2d_sparse_out(%input:  tensor<8x8xi32>,
                 %filter: tensor<3x3xi32, #DCSR>) -> tensor<6x6xi32, #DCSR> {
-    %s = bufferization.alloc_tensor() : tensor<6x6xi32, #DCSR>           
+    %s = bufferization.alloc_tensor() : tensor<6x6xi32, #DCSR>
      %0 = linalg.conv_2d
        ins  (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>)
        outs (%s: tensor<6x6xi32, #DCSR>) -> tensor<6x6xi32, #DCSR>
@@ -39,7 +39,7 @@ module {
  
    func.func @conv2d_all_sparse_DCSR(%input:  tensor<8x8xi32, #DCSR>,
                 %filter: tensor<3x3xi32, #DCSR>) -> tensor<6x6xi32, #DCSR> {
-    %s = bufferization.alloc_tensor() : tensor<6x6xi32, #DCSR>           
+    %s = bufferization.alloc_tensor() : tensor<6x6xi32, #DCSR>
      %0 = linalg.conv_2d
        ins  (%input, %filter: tensor<8x8xi32, #DCSR>, tensor<3x3xi32, #DCSR>)
        outs (%s: tensor<6x6xi32, #DCSR>) -> tensor<6x6xi32, #DCSR>
@@ -48,16 +48,16 @@ module {
  
    func.func @conv2d_all_sparse_CSR(%input:  tensor<8x8xi32, #CSR>,
                 %filter: tensor<3x3xi32, #CSR>) -> tensor<6x6xi32, #CSR> {
-    %s = bufferization.alloc_tensor() : tensor<6x6xi32, #CSR>           
+    %s = bufferization.alloc_tensor() : tensor<6x6xi32, #CSR>
      %0 = linalg.conv_2d
        ins  (%input, %filter: tensor<8x8xi32, #CSR>, tensor<3x3xi32, #CSR>)
        outs (%s: tensor<6x6xi32, #CSR>) -> tensor<6x6xi32, #CSR>
      return %0 : tensor<6x6xi32, #CSR>
    }
-  
+
    func.func @conv2d_all_sparse_CSC(%input:  tensor<8x8xi32, #CSC>,
                 %filter: tensor<3x3xi32, #CSC>) -> tensor<6x6xi32, #CSC> {
-    %s = bufferization.alloc_tensor() : tensor<6x6xi32, #CSC>           
+    %s = bufferization.alloc_tensor() : tensor<6x6xi32, #CSC>
      %0 = linalg.conv_2d
        ins  (%input, %filter: tensor<8x8xi32, #CSC>, tensor<3x3xi32, #CSC>)
        outs (%s: tensor<6x6xi32, #CSC>) -> tensor<6x6xi32, #CSC>
@@ -98,7 +98,7 @@ module {
        : tensor<8x8xi32> to tensor<8x8xi32, #CSR>
      %sparse_input_CSC = sparse_tensor.convert %input
        : tensor<8x8xi32> to tensor<8x8xi32, #CSC>
-      
+
      // Call the kernel.
      %output = arith.constant dense<0> : tensor<6x6xi32>
      %0 = call @conv2d(%input, %sparse_filter_DCSR, %output)
@@ -116,8 +116,8 @@ module {
      %4 = call @conv2d_all_sparse_CSC(%sparse_input_CSC, %sparse_filter_CSC)
         : (tensor<8x8xi32, #CSC>,
            tensor<3x3xi32, #CSC>) -> tensor<6x6xi32, #CSC>
-          
- 
+
+
      // Verify the output.
      //
      // CHECK:    ( ( 0, 0, -1, -6, -1, 6 ),
@@ -145,7 +145,7 @@ module {
      %v1 = vector.transfer_read %sparse_ret[%c0, %c0], %i0
        : tensor<6x6xi32>, vector<6x6xi32>
      vector.print %v1 : vector<6x6xi32>
-    
+
      //
      // Should be the same as dense output
      // CHECK:    ( ( 0, 0, -1, -6, -1, 6 ),
@@ -160,7 +160,7 @@ module {
      %v2 = vector.transfer_read %all_sparse_DCSR[%c0, %c0], %i0
        : tensor<6x6xi32>, vector<6x6xi32>
      vector.print %v2 : vector<6x6xi32>
-    
+
      //
      // Should be the same as dense output
      // CHECK:    ( ( 0, 0, -1, -6, -1, 6 ),
@@ -175,7 +175,7 @@ module {
      %v3 = vector.transfer_read %all_sparse_CSR[%c0, %c0], %i0
        : tensor<6x6xi32>, vector<6x6xi32>
      vector.print %v3 : vector<6x6xi32>
-    
+
      //
      // Should be the same as dense output
      // CHECK:    ( ( 0, 0, -1, -6, -1, 6 ),
@@ -190,12 +190,12 @@ module {
      %v4 = vector.transfer_read %all_sparse_CSC[%c0, %c0], %i0
        : tensor<6x6xi32>, vector<6x6xi32>
      vector.print %v4 : vector<6x6xi32>
-    
+
      // Release the resources.
      bufferization.dealloc_tensor %sparse_filter_DCSR : tensor<3x3xi32, #DCSR>
      bufferization.dealloc_tensor %sparse_filter_CSR : tensor<3x3xi32, #CSR>
      bufferization.dealloc_tensor %sparse_filter_CSC : tensor<3x3xi32, #CSC>
-    
+
      bufferization.dealloc_tensor %sparse_input_DCSR : tensor<8x8xi32, #DCSR>
      bufferization.dealloc_tensor %sparse_input_CSR : tensor<8x8xi32, #CSR>
      bufferization.dealloc_tensor %sparse_input_CSC : tensor<8x8xi32, #CSC>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir

index 6926a5a..a3226d4 100644 (file)
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir
@@ -84,7 +84,7 @@ func.func @entry() {
    %dense_ret = call @conv_2d_nhwc_hwcf(%in2D_nhwc, %filter2D_nhwc, %out2D_nhwc) : (tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) -> (tensor<?x?x?x?xf32>)
    %CCCC_ret = call @conv_2d_nhwc_hwcf_CCCC(%in2D_nhwc_CCCC, %filter2D_nhwc_CCCC) : (tensor<?x?x?x?xf32, #CCCC>, tensor<?x?x?x?xf32, #CCCC>) -> (tensor<?x?x?x?xf32, #CCCC>)
    %CDCD_ret = call @conv_2d_nhwc_hwcf_CDCD(%in2D_nhwc_CDCD, %filter2D_nhwc_CDCD) : (tensor<?x?x?x?xf32, #CDCD>, tensor<?x?x?x?xf32, #CDCD>) -> (tensor<?x?x?x?xf32, #CDCD>)
-  
+
    // CHECK:     ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ),
    // CHECK-SAME:    ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
    // CHECK-SAME:    ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
@@ -159,7 +159,7 @@ func.func @entry() {
    bufferization.dealloc_tensor %in2D_nhwc : tensor<?x?x?x?xf32>
    bufferization.dealloc_tensor %filter2D_nhwc : tensor<?x?x?x?xf32>
    bufferization.dealloc_tensor %out2D_nhwc : tensor<?x?x?x?xf32>
-  
+
    bufferization.dealloc_tensor %in2D_nhwc_CDCD : tensor<?x?x?x?xf32, #CDCD>
    bufferization.dealloc_tensor %filter2D_nhwc_CDCD : tensor<?x?x?x?xf32, #CDCD>
    bufferization.dealloc_tensor %in2D_nhwc_CCCC : tensor<?x?x?x?xf32, #CCCC>
@@ -167,6 +167,6 @@ func.func @entry() {
  
    bufferization.dealloc_tensor %CCCC_ret : tensor<?x?x?x?xf32, #CCCC>
    bufferization.dealloc_tensor %CDCD_ret : tensor<?x?x?x?xf32, #CDCD>
-  
+
    return
  }
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir

index 13700df..38eda17 100644 (file)
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir
@@ -201,12 +201,12 @@ func.func @entry() {
    %v2 = vector.transfer_read %2[%c0, %c0, %c0], %zero
        : tensor<?x?x?xf32>, vector<6x6x6xf32>
    vector.print %v2 : vector<6x6x6xf32>
-  
+
    // Free the resources
    bufferization.dealloc_tensor %in3D : tensor<?x?x?xf32>
    bufferization.dealloc_tensor %filter3D : tensor<?x?x?xf32>
    bufferization.dealloc_tensor %out3D : tensor<?x?x?xf32>
-  
+
    bufferization.dealloc_tensor %in3D_CDC : tensor<?x?x?xf32, #CDC>
    bufferization.dealloc_tensor %filter3D_CDC : tensor<?x?x?xf32, #CDC>
    bufferization.dealloc_tensor %in3D_CCC : tensor<?x?x?xf32, #CCC>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir

index 872e0ad..b5a062c 100644 (file)
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir
@@ -71,10 +71,10 @@ func.func @entry() {
    %f10 = arith.constant 10.00000e+00 : f32
    %val = arith.constant 2.00000e+00 : f32
    %zero = arith.constant 0.00000e+00 : f32
-  
+
    %in3D_tmp = call @alloc_5d_filled_f32(%c1, %c8, %c8, %c8, %c1, %val) : (index, index, index, index, index, f32) -> (tensor<?x?x?x?x?xf32>)
    %in3D_ndhwc = tensor.insert %f10 into %in3D_tmp[%c0, %c0, %c0, %c3, %c0] : tensor<?x?x?x?x?xf32>
-  
+
    %filter3D_ndhwc = call @alloc_5d_filled_f32(%c3, %c3, %c3, %c1, %c1, %val) : (index, index, index, index, index, f32) -> (tensor<?x?x?x?x?xf32>)
    %out3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c6, %c6, %c6, %c1, %zero) : (index, index, index, index, index, f32) -> (tensor<?x?x?x?x?xf32>)
  
@@ -87,7 +87,7 @@ func.func @entry() {
      : tensor<?x?x?x?x?xf32> to tensor<?x?x?x?x?xf32, #CDCDC>
    %filter3D_ndhwc_CDCDC = sparse_tensor.convert %filter3D_ndhwc
      : tensor<?x?x?x?x?xf32> to tensor<?x?x?x?x?xf32, #CDCDC>
-    
+
    //      CHECK:( ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ),
    // CHECK-SAME:      ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
    // CHECK-SAME:      ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
@@ -171,7 +171,7 @@ func.func @entry() {
    // CHECK-SAME:      ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
    // CHECK-SAME:      ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) ) )
    %1 = sparse_tensor.convert %CCCCC_ret
-    : tensor<?x?x?x?x?xf32, #CCCCC> to tensor<?x?x?x?x?xf32>         
+    : tensor<?x?x?x?x?xf32, #CCCCC> to tensor<?x?x?x?x?xf32>
    %v1 = vector.transfer_read %1[%c0, %c0, %c0, %c0, %c0], %zero
        : tensor<?x?x?x?x?xf32>, vector<1x6x6x6x1xf32>
    vector.print %v1 : vector<1x6x6x6x1xf32>
@@ -221,12 +221,12 @@ func.func @entry() {
    %v2 = vector.transfer_read %dense_ret[%c0, %c0, %c0, %c0, %c0], %zero
        : tensor<?x?x?x?x?xf32>, vector<1x6x6x6x1xf32>
    vector.print %v2 : vector<1x6x6x6x1xf32>
-  
+
    // Free the resources
    bufferization.dealloc_tensor %in3D_ndhwc : tensor<?x?x?x?x?xf32>
    bufferization.dealloc_tensor %filter3D_ndhwc : tensor<?x?x?x?x?xf32>
    bufferization.dealloc_tensor %out3D_ndhwc : tensor<?x?x?x?x?xf32>
-  
+
    bufferization.dealloc_tensor %in3D_ndhwc_CDCDC : tensor<?x?x?x?x?xf32, #CDCDC>
    bufferization.dealloc_tensor %filter3D_ndhwc_CDCDC : tensor<?x?x?x?x?xf32, #CDCDC>
    bufferization.dealloc_tensor %in3D_ndhwc_CCCCC : tensor<?x?x?x?x?xf32, #CCCCC>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir

index 495fabf..50977f5 100644 (file)
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir
@@ -27,7 +27,7 @@ module {
  
    func.func @conv2d_sparse_out(%input:  tensor<8x8xi32>,
                 %filter: tensor<3x3xi32, #DCSR>) -> tensor<6x6xi32, #DCSR> {
-    %s = bufferization.alloc_tensor() : tensor<6x6xi32, #DCSR>           
+    %s = bufferization.alloc_tensor() : tensor<6x6xi32, #DCSR>
      %0 = linalg.conv_2d
        ins  (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>)
        outs (%s: tensor<6x6xi32, #DCSR>) -> tensor<6x6xi32, #DCSR>
@@ -66,7 +66,7 @@ module {
      %1 = call @conv2d_sparse_out(%input, %sparse_filter)
         : (tensor<8x8xi32>,
            tensor<3x3xi32, #DCSR>) -> tensor<6x6xi32, #DCSR>
- 
+
      // Verify the output.
      //
      // CHECK:    ( ( 0, 0, -1, -6, -1, 6 ),
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir

index 4305a51..a96e99e 100755 (executable)
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir
@@ -166,7 +166,7 @@ module {
      %m = arith.constant dense <[ [ 1.1,  1.2,  1.3,  1.4 ],
                                   [ 2.1,  2.2,  2.3,  2.4 ],
                                   [ 3.1,  3.2,  3.3,  3.4 ]]> : tensor<3x4xf64>
-    %n = arith.constant dense <[ 
+    %n = arith.constant dense <[
        [ [[1.0,  2.0],  [3.0,  4.0],  [5.0,  6.0],  [7.0,  8.0],  [9.0, 10.0]],
          [[11.0, 12.0], [13.0, 14.0], [15.0, 16.0], [17.0, 18.0], [19.0, 20.0]],
          [[21.0, 22.0], [23.0, 24.0], [25.0, 26.0], [27.0, 28.0], [29.0, 30.0]] ],
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-0-d-vectors.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-0-d-vectors.mlir

index 9b91c03..1e3f653 100644 (file)
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-0-d-vectors.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-0-d-vectors.mlir
@@ -106,7 +106,7 @@ func.func @create_mask_0d(%zero : index, %one : index) {
  }
  
  func.func @reduce_add(%arg0: vector<f32>) {
-  %0 = vector.reduction <add>, %arg0 : vector<f32> into f32    
+  %0 = vector.reduction <add>, %arg0 : vector<f32> into f32
    vector.print %0 : f32
    // CHECK: 5
    return
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-scan.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-scan.mlir

index 82f9c02..405223d 100644 (file)
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-scan.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-scan.mlir
@@ -50,5 +50,5 @@ func.func @entry() {
    vector.print %d#0 : vector<3x2xf32>
    vector.print %d#1 : vector<3xf32>
  
-  return 
+  return
  }
diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir

index 376b1e9..4f45034 100644 (file)
--- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
@@ -53,9 +53,9 @@ func.func @gemm_generic_fusion(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
        ins(%gemm, %arg2 : tensor<?x?xf32>, tensor<?xf32>) outs(%init : tensor<?x?xf32>) {
      ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
        %add = arith.addf %b0, %b1 : f32
-      linalg.yield %add : f32 
+      linalg.yield %add : f32
    } -> tensor<?x?xf32>
-  return %generic : tensor<?x?xf32> 
+  return %generic : tensor<?x?xf32>
  }
  //      CHECK: func.func @gemm_generic_fusion(
  // CHECK-SAME:     %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32>
@@ -152,7 +152,7 @@ func.func @gemm_transpose_fusion(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32
        iterator_types = ["parallel", "parallel"]}
        ins(%gemm : tensor<?x?xf32>) outs(%init1 : tensor<?x?xf32>) {
      ^bb0(%b0 : f32, %b1 : f32):
-      linalg.yield %b0 : f32 
+      linalg.yield %b0 : f32
    } -> tensor<?x?xf32>
    return %transpose : tensor<?x?xf32>
  }
diff --git a/mlir/test/Rewrite/pdl-bytecode.mlir b/mlir/test/Rewrite/pdl-bytecode.mlir

index 565874f..57bec8c 100644 (file)
--- a/mlir/test/Rewrite/pdl-bytecode.mlir
+++ b/mlir/test/Rewrite/pdl-bytecode.mlir
@@ -588,7 +588,7 @@ module @patterns {
        %rootOperand = pdl_interp.get_operand 0 of %root
        %rootOperands = pdl_interp.get_operands of %root : !pdl.range<value>
        %operandRange = pdl_interp.create_range %rootOperand, %rootOperands : !pdl.value, !pdl.range<value>
-      
+
        %operandType = pdl_interp.get_value_type of %rootOperand : !pdl.type
        %operandTypes = pdl_interp.get_value_type of %rootOperands : !pdl.range<type>
        %typeRange = pdl_interp.create_range %operandType, %operandTypes : !pdl.type, !pdl.range<type>
diff --git a/mlir/test/Target/Cpp/for.mlir b/mlir/test/Target/Cpp/for.mlir

index a9955fa..08ff3f7 100644 (file)
--- a/mlir/test/Target/Cpp/for.mlir
+++ b/mlir/test/Target/Cpp/for.mlir
@@ -27,7 +27,7 @@ func.func @test_for_yield() {
  
    %s0 = arith.constant 0 : i32
    %p0 = arith.constant 1.0 : f32
-  
+
    %result:2 = scf.for %iter = %start to %stop step %step iter_args(%si = %s0, %pi = %p0) -> (i32, f32) {
      %sn = emitc.call "add"(%si, %iter) : (i32, index) -> i32
      %pn = emitc.call "mul"(%pi, %iter) : (f32, index) -> f32
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir

index fed2a48..4ead9dc 100644 (file)
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -396,7 +396,7 @@ llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxnreg = 16} {
  // CHECK:     {ptr @kernel_func, !"maxnreg", i32 16}
  // -----
  
-llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxntid = [1,23,32], 
+llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxntid = [1,23,32],
                                       nvvm.minctasm = 16, nvvm.maxnreg = 32} {
    llvm.return
  }
@@ -411,27 +411,27 @@ llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxntid = [1,23,32],
  // CHECK:     {ptr @kernel_func, !"minctasm", i32 16}
  
  // -----
-// expected-error @below {{'"nvvm.minctasm"' attribute must be integer constant}}  
-llvm.func @kernel_func() attributes {nvvm.kernel, 
+// expected-error @below {{'"nvvm.minctasm"' attribute must be integer constant}}
+llvm.func @kernel_func() attributes {nvvm.kernel,
  nvvm.minctasm = "foo"} {
    llvm.return
  }
  
  
  // -----
-// expected-error @below {{'"nvvm.maxnreg"' attribute must be integer constant}}  
-llvm.func @kernel_func() attributes {nvvm.kernel, 
+// expected-error @below {{'"nvvm.maxnreg"' attribute must be integer constant}}
+llvm.func @kernel_func() attributes {nvvm.kernel,
  nvvm.maxnreg = "boo"} {
    llvm.return
  }
  // -----
-// expected-error @below {{'"nvvm.reqntid"' attribute must be integer array with maximum 3 index}}  
+// expected-error @below {{'"nvvm.reqntid"' attribute must be integer array with maximum 3 index}}
  llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.reqntid = [3,4,5,6]} {
    llvm.return
  }
  
  // -----
-// expected-error @below {{'"nvvm.maxntid"' attribute must be integer array with maximum 3 index}}  
+// expected-error @below {{'"nvvm.maxntid"' attribute must be integer array with maximum 3 index}}
  llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxntid = [3,4,5,6]} {
    llvm.return
  }
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir

index aa986de..61afd6a 100644 (file)
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -2368,7 +2368,7 @@ llvm.func @omp_taskgroup(%x: i32, %y: i32, %zaddr: !llvm.ptr<i32>) {
  }
  
  // CHECK-LABEL: define void @omp_taskgroup(
-// CHECK-SAME:                             i32 %[[x:.+]], i32 %[[y:.+]], ptr %[[zaddr:.+]]) 
+// CHECK-SAME:                             i32 %[[x:.+]], i32 %[[y:.+]], ptr %[[zaddr:.+]])
  // CHECK:         br label %[[entry:[^,]+]]
  // CHECK:       [[entry]]:
  // CHECK:         %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}})
@@ -2409,7 +2409,7 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr<i32>) {
  }
  
  // CHECK-LABEL: define void @omp_taskgroup_task(
-// CHECK-SAME:                                  i32 %[[x:.+]], i32 %[[y:.+]], ptr %[[zaddr:.+]]) 
+// CHECK-SAME:                                  i32 %[[x:.+]], i32 %[[y:.+]], ptr %[[zaddr:.+]])
  // CHECK:         %[[structArg:.+]] = alloca { i32, i32, ptr }, align 8
  // CHECK:         br label %[[entry:[^,]+]]
  // CHECK:       [[entry]]:                                            ; preds = %3
diff --git a/mlir/test/Target/LLVMIR/openmp-nested.mlir b/mlir/test/Target/LLVMIR/openmp-nested.mlir

index 6e0fa42..5e047d5 100644 (file)
--- a/mlir/test/Target/LLVMIR/openmp-nested.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-nested.mlir
@@ -1,5 +1,5 @@
  // RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
- 
+
  module {
    llvm.func @printf(!llvm.ptr<i8>, ...) -> i32
    llvm.mlir.global internal constant @str0("WG size of kernel = %d X %d\0A\00")
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction.mlir b/mlir/test/Target/LLVMIR/openmp-reduction.mlir

index 2f70e2f..abeb46b 100644 (file)
--- a/mlir/test/Target/LLVMIR/openmp-reduction.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction.mlir
@@ -26,7 +26,7 @@ llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) {
    %c1 = llvm.mlir.constant(1 : i32) : i32
    %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
    omp.parallel {
-    omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr<f32>) 
+    omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr<f32>)
      for (%iv) : i64 = (%lb) to (%ub) step (%step) {
        %1 = llvm.mlir.constant(2.0 : f32) : f32
        omp.reduction %1, %0 : !llvm.ptr<f32>
diff --git a/mlir/test/Target/SPIRV/ocl-ops.mlir b/mlir/test/Target/SPIRV/ocl-ops.mlir

index 850dfdf..9a2e4cf 100644 (file)
--- a/mlir/test/Target/SPIRV/ocl-ops.mlir
+++ b/mlir/test/Target/SPIRV/ocl-ops.mlir
@@ -32,7 +32,7 @@ spirv.module Physical64 OpenCL requires #spirv.vce<v1.0, [Kernel, Addresses], []
      %0 = spirv.CL.s_abs %arg0 : i32
      spirv.Return
    }
-  
+
    spirv.func @vector_size16(%arg0 : vector<16xf32>) "None" {
      // CHECK: {{%.*}} = spirv.CL.fabs {{%.*}} : vector<16xf32>
      %0 = spirv.CL.fabs %arg0 : vector<16xf32>
diff --git a/mlir/test/Transforms/cse.mlir b/mlir/test/Transforms/cse.mlir

index 08429f7..33e93e4 100644 (file)
--- a/mlir/test/Transforms/cse.mlir
+++ b/mlir/test/Transforms/cse.mlir
@@ -264,7 +264,7 @@ func.func @use_before_def() {
      "foo.yield"(%0) : (i32) -> ()
    }
    return
-} 
+}
  
  /// This test is checking that CSE is removing duplicated read op that follow
  /// other.
diff --git a/mlir/test/Transforms/loop-fusion-3.mlir b/mlir/test/Transforms/loop-fusion-3.mlir

index 9127a38..2628c19 100644 (file)
--- a/mlir/test/Transforms/loop-fusion-3.mlir
+++ b/mlir/test/Transforms/loop-fusion-3.mlir
@@ -1,7 +1,7 @@
  // RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-fusion -split-input-file | FileCheck %s
  // RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-fusion="fusion-maximal" -split-input-file | FileCheck %s --check-prefix=MAXIMAL
  
-// Part I of fusion tests in  mlir/test/Transforms/loop-fusion.mlir. 
+// Part I of fusion tests in  mlir/test/Transforms/loop-fusion.mlir.
  // Part II of fusion tests in mlir/test/Transforms/loop-fusion-2.mlir
  // Part IV of fusion tests in mlir/test/Transforms/loop-fusion-4.mlir
  
diff --git a/mlir/test/Transforms/loop-invariant-code-motion.mlir b/mlir/test/Transforms/loop-invariant-code-motion.mlir

index 090962d..4526b23 100644 (file)
--- a/mlir/test/Transforms/loop-invariant-code-motion.mlir
+++ b/mlir/test/Transforms/loop-invariant-code-motion.mlir
@@ -60,7 +60,7 @@ func.func @single_loop_nothing_invariant() {
  
    // CHECK: memref.alloc() : memref<10xf32>
    // CHECK-NEXT: memref.alloc() : memref<10xf32>
-  // CHECK-NEXT: affine.for 
+  // CHECK-NEXT: affine.for
    // CHECK-NEXT: affine.load
    // CHECK-NEXT: affine.load
    // CHECK-NEXT: arith.addf
@@ -389,7 +389,7 @@ func.func private @make_val() -> (index)
  func.func @nested_uses_inside(%lb: index, %ub: index, %step: index) {
    %true = arith.constant true
  
-  // Check that ops that contain nested uses to values not defiend outside 
+  // Check that ops that contain nested uses to values not defiend outside
    // remain in the loop.
    // CHECK-NEXT: arith.constant
    // CHECK-NEXT: scf.for
@@ -877,25 +877,25 @@ func.func @speculate_ceildivsi_const(
  
  // -----
  
-func.func @speculate_static_pack_and_unpack(%source: tensor<128x256xf32>, 
+func.func @speculate_static_pack_and_unpack(%source: tensor<128x256xf32>,
    %dest: tensor<4x16x32x16xf32>, %lb: index, %ub: index, %step: index) {
  
    // CHECK: tensor.pack
-  // CHECK-NEXT: scf.for  
+  // CHECK-NEXT: scf.for
    scf.for %i = %lb to %ub step %step {
-    %packed = tensor.pack %source 
-      inner_dims_pos = [0, 1] 
+    %packed = tensor.pack %source
+      inner_dims_pos = [0, 1]
        inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32>
    }
-  
+
    // CHECK: tensor.unpack
-  // CHECK-NEXT: scf.for 
+  // CHECK-NEXT: scf.for
    scf.for %i = %lb to %ub step %step {
      %unpacked = tensor.unpack %dest
-      inner_dims_pos = [0, 1] 
+      inner_dims_pos = [0, 1]
        inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32>
    }
-  return 
+  return
  }
  
  // -----
@@ -916,7 +916,7 @@ func.func @speculate_dynamic_pack_and_unpack(%source: tensor<?x?xf32>,
    // CHECK-NEXT: tensor.unpack
    scf.for %i = %lb to %ub step %step {
      %unpacked = tensor.unpack %dest
-      inner_dims_pos = [0, 1] 
+      inner_dims_pos = [0, 1]
        inner_tiles = [%tile_n, %tile_m] into %source : tensor<?x?x?x?xf32> -> tensor<?x?xf32>
    }
  
diff --git a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.td b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.td

index 9ca2675..58dc534 100644 (file)
--- a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.td
+++ b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.td
@@ -32,10 +32,10 @@ def TestTransformTestDialectHandleType
  def TestProduceParamOrForwardOperandOp
    : Op<Transform_Dialect, "test_produce_param_or_forward_operand",
         [DeclareOpInterfaceMethods<TransformOpInterface>]> {
-  let arguments = (ins 
+  let arguments = (ins
       Arg<Optional<PDL_Operation>, "", [TransformMappingRead]>:$operand,
       OptionalAttr<I64Attr>:$parameter);
-  let results = (outs 
+  let results = (outs
       Res<PDL_Operation, "",
           [TransformMappingAlloc, TransformMappingWrite]>:$res);
    let assemblyFormat = "(`from` $operand^)? ($parameter^)? attr-dict";
@@ -57,7 +57,7 @@ def TestConsumeOperand : Op<Transform_Dialect, "test_consume_operand",
  def TestConsumeOperandIfMatchesParamOrFail
    : Op<Transform_Dialect, "test_consume_operand_if_matches_param_or_fail",
         [DeclareOpInterfaceMethods<TransformOpInterface>]> {
-  let arguments = (ins 
+  let arguments = (ins
      Arg<PDL_Operation, "",
          [TransformMappingRead, TransformMappingFree]>:$operand,
      I64Attr:$parameter);
@@ -68,7 +68,7 @@ def TestConsumeOperandIfMatchesParamOrFail
  def TestPrintRemarkAtOperandOp
    : Op<Transform_Dialect, "test_print_remark_at_operand",
         [DeclareOpInterfaceMethods<TransformOpInterface>]> {
-  let arguments = (ins 
+  let arguments = (ins
      Arg<TransformTypeInterface, "",
          [TransformMappingRead, PayloadIRRead]>:$operand,
      StrAttr:$message);
@@ -152,7 +152,7 @@ def TestEmitRemarkAndEraseOperandOp
  
  def TestWrongNumberOfResultsOp
    : Op<Transform_Dialect, "test_wrong_number_of_results",
-    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 
+    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
       TransformEachOpTrait, TransformOpInterface]> {
    let arguments = (ins PDL_Operation:$target);
    let results = (outs PDL_Operation:$a,
@@ -162,15 +162,15 @@ def TestWrongNumberOfResultsOp
    let cppNamespace = "::mlir::test";
    let extraClassDeclaration = [{
      ::mlir::DiagnosedSilenceableFailure applyToOne(
-        ::mlir::Operation * target, 
-        ::llvm::SmallVectorImpl<::mlir::Operation *> &results, 
+        ::mlir::Operation * target,
+        ::llvm::SmallVectorImpl<::mlir::Operation *> &results,
          ::mlir::transform::TransformState &state);
    }];
  }
  
  def TestWrongNumberOfMultiResultsOp
    : Op<Transform_Dialect, "test_wrong_number_of_multi_results",
-    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 
+    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
       TransformEachOpTrait, TransformOpInterface]> {
    let arguments = (ins PDL_Operation:$target);
    let results = (outs PDL_Operation:$result);
@@ -178,15 +178,15 @@ def TestWrongNumberOfMultiResultsOp
    let cppNamespace = "::mlir::test";
    let extraClassDeclaration = [{
      ::mlir::DiagnosedSilenceableFailure applyToOne(
-        ::mlir::Operation * target, 
-        ::llvm::SmallVectorImpl<::mlir::Operation *> &results, 
+        ::mlir::Operation * target,
+        ::llvm::SmallVectorImpl<::mlir::Operation *> &results,
          ::mlir::transform::TransformState &state);
    }];
  }
  
  def TestCorrectNumberOfMultiResultsOp
    : Op<Transform_Dialect, "test_correct_number_of_multi_results",
-    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 
+    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
       TransformEachOpTrait, TransformOpInterface]> {
    let arguments = (ins PDL_Operation:$target);
    let results = (outs PDL_Operation:$result1,
@@ -195,15 +195,15 @@ def TestCorrectNumberOfMultiResultsOp
    let cppNamespace = "::mlir::test";
    let extraClassDeclaration = [{
      ::mlir::DiagnosedSilenceableFailure applyToOne(
-        ::mlir::Operation * target, 
-        ::llvm::SmallVectorImpl<::mlir::Operation *> &results, 
+        ::mlir::Operation * target,
+        ::llvm::SmallVectorImpl<::mlir::Operation *> &results,
          ::mlir::transform::TransformState &state);
    }];
  }
  
  def TestMixedNullAndNonNullResultsOp
    : Op<Transform_Dialect, "test_mixed_null_and_non_null_results",
-    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 
+    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
       TransformEachOpTrait, TransformOpInterface]> {
    let arguments = (ins PDL_Operation:$target);
    let results = (outs PDL_Operation:$null,
@@ -212,15 +212,15 @@ def TestMixedNullAndNonNullResultsOp
    let cppNamespace = "::mlir::test";
    let extraClassDeclaration = [{
      ::mlir::DiagnosedSilenceableFailure applyToOne(
-        ::mlir::Operation * target, 
-        ::llvm::SmallVectorImpl<::mlir::Operation *> &results, 
+        ::mlir::Operation * target,
+        ::llvm::SmallVectorImpl<::mlir::Operation *> &results,
          ::mlir::transform::TransformState &state);
    }];
  }
  
  def TestMixedSuccessAndSilenceableOp
    : Op<Transform_Dialect, "test_mixed_sucess_and_silenceable",
-    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, 
+    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
       TransformEachOpTrait, TransformOpInterface]> {
    let arguments = (ins PDL_Operation:$target);
    let results = (outs);
@@ -228,8 +228,8 @@ def TestMixedSuccessAndSilenceableOp
    let cppNamespace = "::mlir::test";
    let extraClassDeclaration = [{
      ::mlir::DiagnosedSilenceableFailure applyToOne(
-        ::mlir::Operation * target, 
-        ::llvm::SmallVectorImpl<::mlir::Operation *> &results, 
+        ::mlir::Operation * target,
+        ::llvm::SmallVectorImpl<::mlir::Operation *> &results,
          ::mlir::transform::TransformState &state);
    }];
  }
diff --git a/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir b/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir

index 18c1b78..b95f6be 100644 (file)
--- a/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir
+++ b/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir
@@ -9,25 +9,25 @@
  // Tanh.
  // -------------------------------------------------------------------------- //
  
-func.func @tanh_f32(%a : f32) { 
+func.func @tanh_f32(%a : f32) {
    %r = math.tanh %a : f32
    vector.print %r : f32
    return
  }
  
-func.func @tanh_4xf32(%a : vector<4xf32>) { 
+func.func @tanh_4xf32(%a : vector<4xf32>) {
    %r = math.tanh %a : vector<4xf32>
    vector.print %r : vector<4xf32>
    return
  }
  
-func.func @tanh_8xf32(%a : vector<8xf32>) { 
+func.func @tanh_8xf32(%a : vector<8xf32>) {
    %r = math.tanh %a : vector<8xf32>
    vector.print %r : vector<8xf32>
    return
  }
  
-func.func @tanh() { 
+func.func @tanh() {
    // CHECK: 0.848284
    %f0 = arith.constant 1.25 : f32
    call @tanh_f32(%f0) : (f32) -> ()
@@ -51,25 +51,25 @@ func.func @tanh() {
  // Log.
  // -------------------------------------------------------------------------- //
  
-func.func @log_f32(%a : f32) { 
+func.func @log_f32(%a : f32) {
    %r = math.log %a : f32
    vector.print %r : f32
    return
  }
  
-func.func @log_4xf32(%a : vector<4xf32>) { 
+func.func @log_4xf32(%a : vector<4xf32>) {
    %r = math.log %a : vector<4xf32>
    vector.print %r : vector<4xf32>
    return
  }
  
-func.func @log_8xf32(%a : vector<8xf32>) { 
+func.func @log_8xf32(%a : vector<8xf32>) {
    %r = math.log %a : vector<8xf32>
    vector.print %r : vector<8xf32>
    return
  }
  
-func.func @log() { 
+func.func @log() {
    // CHECK: 2.64704
    %f1 = arith.constant 14.112233 : f32
    call @log_f32(%f1) : (f32) -> ()
@@ -101,19 +101,19 @@ func.func @log() {
    return
  }
  
-func.func @log2_f32(%a : f32) { 
+func.func @log2_f32(%a : f32) {
    %r = math.log2 %a : f32
    vector.print %r : f32
    return
  }
  
-func.func @log2_4xf32(%a : vector<4xf32>) { 
+func.func @log2_4xf32(%a : vector<4xf32>) {
    %r = math.log2 %a : vector<4xf32>
    vector.print %r : vector<4xf32>
    return
  }
  
-func.func @log2_8xf32(%a : vector<8xf32>) { 
+func.func @log2_8xf32(%a : vector<8xf32>) {
    %r = math.log2 %a : vector<8xf32>
    vector.print %r : vector<8xf32>
    return
@@ -151,19 +151,19 @@ func.func @log2() {
    return
  }
  
-func.func @log1p_f32(%a : f32) { 
+func.func @log1p_f32(%a : f32) {
    %r = math.log1p %a : f32
    vector.print %r : f32
    return
  }
  
-func.func @log1p_4xf32(%a : vector<4xf32>) { 
+func.func @log1p_4xf32(%a : vector<4xf32>) {
    %r = math.log1p %a : vector<4xf32>
    vector.print %r : vector<4xf32>
    return
  }
  
-func.func @log1p_8xf32(%a : vector<8xf32>) { 
+func.func @log1p_8xf32(%a : vector<8xf32>) {
    %r = math.log1p %a : vector<8xf32>
    vector.print %r : vector<8xf32>
    return
@@ -205,13 +205,13 @@ func.func @log1p() {
  // -------------------------------------------------------------------------- //
  // Erf.
  // -------------------------------------------------------------------------- //
-func.func @erf_f32(%a : f32) { 
+func.func @erf_f32(%a : f32) {
    %r = math.erf %a : f32
    vector.print %r : f32
    return
  }
  
-func.func @erf_4xf32(%a : vector<4xf32>) { 
+func.func @erf_4xf32(%a : vector<4xf32>) {
    %r = math.erf %a : vector<4xf32>
    vector.print %r : vector<4xf32>
    return
@@ -276,13 +276,13 @@ func.func @erf() {
  // -------------------------------------------------------------------------- //
  // Exp.
  // -------------------------------------------------------------------------- //
-func.func @exp_f32(%a : f32) { 
+func.func @exp_f32(%a : f32) {
    %r = math.exp %a : f32
    vector.print %r : f32
    return
  }
  
-func.func @exp_4xf32(%a : vector<4xf32>) { 
+func.func @exp_4xf32(%a : vector<4xf32>) {
    %r = math.exp %a : vector<4xf32>
    vector.print %r : vector<4xf32>
    return
@@ -301,7 +301,7 @@ func.func @exp() {
    %zero = arith.constant 0.0 : f32
    call @exp_f32(%zero) : (f32) -> ()
  
-  // CHECK: 1.17549e-38, 1.38879e-11, 7.20049e+10, inf 
+  // CHECK: 1.17549e-38, 1.38879e-11, 7.20049e+10, inf
    %special_vec = arith.constant dense<[-89.0, -25.0, 25.0, 89.0]> : vector<4xf32>
    call @exp_4xf32(%special_vec) : (vector<4xf32>) -> ()
  
@@ -320,25 +320,25 @@ func.func @exp() {
    return
  }
  
-func.func @expm1_f32(%a : f32) { 
+func.func @expm1_f32(%a : f32) {
    %r = math.expm1 %a : f32
    vector.print %r : f32
    return
  }
  
-func.func @expm1_3xf32(%a : vector<3xf32>) { 
+func.func @expm1_3xf32(%a : vector<3xf32>) {
    %r = math.expm1 %a : vector<3xf32>
    vector.print %r : vector<3xf32>
    return
  }
  
-func.func @expm1_4xf32(%a : vector<4xf32>) { 
+func.func @expm1_4xf32(%a : vector<4xf32>) {
    %r = math.expm1 %a : vector<4xf32>
    vector.print %r : vector<4xf32>
    return
  }
  
-func.func @expm1_8xf32(%a : vector<8xf32>) { 
+func.func @expm1_8xf32(%a : vector<8xf32>) {
    %r = math.expm1 %a : vector<8xf32>
    vector.print %r : vector<8xf32>
    return
@@ -378,13 +378,13 @@ func.func @expm1() {
  // -------------------------------------------------------------------------- //
  // Sin.
  // -------------------------------------------------------------------------- //
-func.func @sin_f32(%a : f32) { 
+func.func @sin_f32(%a : f32) {
    %r = math.sin %a : f32
    vector.print %r : f32
    return
  }
  
-func.func @sin_3xf32(%a : vector<3xf32>) { 
+func.func @sin_3xf32(%a : vector<3xf32>) {
    %r = math.sin %a : vector<3xf32>
    vector.print %r : vector<3xf32>
    return
@@ -421,13 +421,13 @@ func.func @sin() {
  // -------------------------------------------------------------------------- //
  // cos.
  // -------------------------------------------------------------------------- //
-func.func @cos_f32(%a : f32) { 
+func.func @cos_f32(%a : f32) {
    %r = math.cos %a : f32
    vector.print %r : f32
    return
  }
  
-func.func @cos_3xf32(%a : vector<3xf32>) { 
+func.func @cos_3xf32(%a : vector<3xf32>) {
    %r = math.cos %a : vector<3xf32>
    vector.print %r : vector<3xf32>
    return
@@ -464,7 +464,7 @@ func.func @cos() {
  // -------------------------------------------------------------------------- //
  // Atan.
  // -------------------------------------------------------------------------- //
-func.func @atan_f32(%a : f32) { 
+func.func @atan_f32(%a : f32) {
    %r = math.atan %a : f32
    vector.print %r : f32
    return
@@ -506,7 +506,7 @@ func.func @atan() {
  // -------------------------------------------------------------------------- //
  // Atan2.
  // -------------------------------------------------------------------------- //
-func.func @atan2_f32(%a : f32, %b : f32) { 
+func.func @atan2_f32(%a : f32, %b : f32) {
    %r = math.atan2 %a, %b : f32
    vector.print %r : f32
    return
diff --git a/mlir/test/mlir-tblgen/typedefs.td b/mlir/test/mlir-tblgen/typedefs.td

index 29ebad5..f247264 100644 (file)
--- a/mlir/test/mlir-tblgen/typedefs.td
+++ b/mlir/test/mlir-tblgen/typedefs.td
@@ -31,9 +31,9 @@ include "mlir/IR/OpBase.td"
  // DEF-NEXT: .Case(::test::IndexType::getMnemonic()
  // DEF-NEXT:   value = ::test::IndexType::parse(parser);
  // DEF-NEXT:   return ::mlir::success(!!value);
-// DEF: .Default([&](llvm::StringRef keyword, 
+// DEF: .Default([&](llvm::StringRef keyword,
  // DEF-NEXT:   *mnemonic = keyword;
-// DEF-NEXT:   return llvm::None; 
+// DEF-NEXT:   return llvm::None;
  
  def Test_Dialect: Dialect {
  // DECL-NOT: TestDialect
author	Hanhan Wang <hanchung@google.com>
	Mon, 28 Nov 2022 22:17:55 +0000 (14:17 -0800)
committer	Hanhan Wang <hanchung@google.com>
	Mon, 28 Nov 2022 23:26:30 +0000 (15:26 -0800)
mlir/include/mlir/Dialect/Async/IR/AsyncOps.td		patch \| blob \| history
mlir/include/mlir/Dialect/Bufferization/IR/BufferizationEnums.td		patch \| blob \| history
mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td		patch \| blob \| history
mlir/include/mlir/Dialect/GPU/IR/ParallelLoopMapperAttr.td		patch \| blob \| history
mlir/include/mlir/Dialect/GPU/TransformOps/GPUDeviceMappingAttr.td		patch \| blob \| history
mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td		patch \| blob \| history
mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td		patch \| blob \| history
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td		patch \| blob \| history
mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td		patch \| blob \| history
mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td		patch \| blob \| history
mlir/include/mlir/Dialect/PDL/IR/PDLOps.td		patch \| blob \| history
mlir/include/mlir/Dialect/PDLInterp/IR/PDLInterpOps.td		patch \| blob \| history
mlir/include/mlir/Dialect/SCF/IR/DeviceMappingInterface.td		patch \| blob \| history
mlir/include/mlir/Dialect/SCF/IR/SCFOps.td		patch \| blob \| history
mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td		patch \| blob \| history
mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td		patch \| blob \| history
mlir/include/mlir/Dialect/Tosa/IR/TosaUtilOps.td		patch \| blob \| history
mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td		patch \| blob \| history
mlir/include/mlir/Dialect/Transform/IR/TransformOps.td		patch \| blob \| history
mlir/include/mlir/IR/BuiltinTypes.td		patch \| blob \| history
mlir/include/mlir/IR/OpAsmInterface.td		patch \| blob \| history
mlir/include/mlir/IR/SubElementInterfaces.td		patch \| blob \| history
mlir/include/mlir/Interfaces/ControlFlowInterfaces.td		patch \| blob \| history
mlir/include/mlir/Interfaces/ParallelCombiningOpInterface.td		patch \| blob \| history
mlir/include/mlir/Interfaces/TilingInterface.td		patch \| blob \| history
mlir/test/Analysis/DataFlow/test-dead-code-analysis.mlir		patch \| blob \| history
mlir/test/Analysis/test-foo-analysis.mlir		patch \| blob \| history
mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir		patch \| blob \| history
mlir/test/Conversion/FuncToLLVM/convert-types.mlir		patch \| blob \| history
mlir/test/Conversion/FuncToLLVM/func-memref-return.mlir		patch \| blob \| history
mlir/test/Conversion/MemRefToLLVM/convert-alloca-scope.mlir		patch \| blob \| history
mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir		patch \| blob \| history
mlir/test/Conversion/TosaToArith/tosa-to-arith.mlir		patch \| blob \| history
mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir		patch \| blob \| history
mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir		patch \| blob \| history
mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir		patch \| blob \| history
mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir		patch \| blob \| history
mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir		patch \| blob \| history
mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir		patch \| blob \| history
mlir/test/Dialect/Affine/affine-loop-normalize.mlir		patch \| blob \| history
mlir/test/Dialect/Affine/ops.mlir		patch \| blob \| history
mlir/test/Dialect/GPU/transform-gpu-failing.mlir		patch \| blob \| history
mlir/test/Dialect/LLVMIR/nvvm.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/collapse-dim.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/decompose-ops.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/detensorize_0d.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/detensorize_br_operands.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/detensorize_if.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/detensorize_while.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/erase-unused-operands-and-results.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/fusion-indexed.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/fusion-push-reshape.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/generalize-named-ops.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/inline-scalar-operands.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/library-calls.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/loops.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/lower-pad-tensor.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/pad_fusion.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/parallel-loops.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/reshape_fusion.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/transform-op-pad.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/transform-op-vectorize.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/transform-promotion.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/transform-tile-reduction.mlir		patch \| blob \| history
mlir/test/Dialect/Linalg/vectorize-convolution.mlir		patch \| blob \| history
mlir/test/Dialect/MemRef/expand-strided-metadata.mlir		patch \| blob \| history
mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir		patch \| blob \| history
mlir/test/Dialect/MemRef/ops.mlir		patch \| blob \| history
mlir/test/Dialect/MemRef/subview.mlir		patch \| blob \| history
mlir/test/Dialect/NVGPU/invalid.mlir		patch \| blob \| history
mlir/test/Dialect/NVGPU/mma-sync-f32-to-tf32.mlir		patch \| blob \| history
mlir/test/Dialect/NVGPU/mma-sync-f32-to-tf32x3.mlir		patch \| blob \| history
mlir/test/Dialect/NVGPU/optimize-shared-memory.mlir		patch \| blob \| history
mlir/test/Dialect/OpenMP/canonicalize.mlir		patch \| blob \| history
mlir/test/Dialect/PDL/invalid.mlir		patch \| blob \| history
mlir/test/Dialect/PDL/ops.mlir		patch \| blob \| history
mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir		patch \| blob \| history
mlir/test/Dialect/SCF/loop-pipelining.mlir		patch \| blob \| history
mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir		patch \| blob \| history
mlir/test/Dialect/SCF/one-shot-bufferize.mlir		patch \| blob \| history
mlir/test/Dialect/SPIRV/IR/asm-op-interface.mlir		patch \| blob \| history
mlir/test/Dialect/SPIRV/IR/group-ops.mlir		patch \| blob \| history
mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir		patch \| blob \| history
mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir		patch \| blob \| history
mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir		patch \| blob \| history
mlir/test/Dialect/SparseTensor/roundtrip.mlir		patch \| blob \| history
mlir/test/Dialect/SparseTensor/sparse_affine.mlir		patch \| blob \| history
mlir/test/Dialect/SparseTensor/sparse_concat.mlir		patch \| blob \| history
mlir/test/Dialect/Tensor/extract-slice-from-collapse-shape.mlir		patch \| blob \| history
mlir/test/Dialect/Tensor/invalid.mlir		patch \| blob \| history
mlir/test/Dialect/Tensor/ops.mlir		patch \| blob \| history
mlir/test/Dialect/Tosa/inlining.mlir		patch \| blob \| history
mlir/test/Dialect/Transform/check-use-after-free.mlir		patch \| blob \| history
mlir/test/Dialect/Transform/test-interpreter.mlir		patch \| blob \| history
mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir		patch \| blob \| history
mlir/test/Dialect/Vector/vector-transfer-flatten.mlir		patch \| blob \| history
mlir/test/Dialect/Vector/vector-transferop-opt.mlir		patch \| blob \| history
mlir/test/Dialect/Vector/vector-unroll-options.mlir		patch \| blob \| history
mlir/test/Dialect/Vector/vector-warp-distribute.mlir		patch \| blob \| history
mlir/test/IR/invalid-affinemap.mlir		patch \| blob \| history
mlir/test/IR/invalid-locations.mlir		patch \| blob \| history
mlir/test/IR/invalid.mlir		patch \| blob \| history
mlir/test/IR/print-value-users.mlir		patch \| blob \| history
mlir/test/IR/test-clone.mlir		patch \| blob \| history
mlir/test/IR/test-take-body.mlir		patch \| blob \| history
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir		patch \| blob \| history
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_codegen_foreach.mlir		patch \| blob \| history
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir		patch \| blob \| history
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir		patch \| blob \| history
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir		patch \| blob \| history
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir		patch \| blob \| history
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir		patch \| blob \| history
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir		patch \| blob \| history
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir		patch \| blob \| history
mlir/test/Integration/Dialect/Vector/CPU/test-0-d-vectors.mlir		patch \| blob \| history
mlir/test/Integration/Dialect/Vector/CPU/test-scan.mlir		patch \| blob \| history
mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir		patch \| blob \| history
mlir/test/Rewrite/pdl-bytecode.mlir		patch \| blob \| history
mlir/test/Target/Cpp/for.mlir		patch \| blob \| history
mlir/test/Target/LLVMIR/nvvmir.mlir		patch \| blob \| history
mlir/test/Target/LLVMIR/openmp-llvm.mlir		patch \| blob \| history
mlir/test/Target/LLVMIR/openmp-nested.mlir		patch \| blob \| history
mlir/test/Target/LLVMIR/openmp-reduction.mlir		patch \| blob \| history
mlir/test/Target/SPIRV/ocl-ops.mlir		patch \| blob \| history
mlir/test/Transforms/cse.mlir		patch \| blob \| history
mlir/test/Transforms/loop-fusion-3.mlir		patch \| blob \| history
mlir/test/Transforms/loop-invariant-code-motion.mlir		patch \| blob \| history
mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.td		patch \| blob \| history
mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir		patch \| blob \| history
mlir/test/mlir-tblgen/typedefs.td		patch \| blob \| history