[Matrix] Add remark propagation along the inlined-at chain.

author Florian Hahn <flo@fhahn.com>

Wed, 11 Mar 2020 17:01:47 +0000 (17:01 +0000)

committer Florian Hahn <flo@fhahn.com>

Wed, 11 Mar 2020 17:40:08 +0000 (17:40 +0000)
author Florian Hahn <flo@fhahn.com>
Wed, 11 Mar 2020 17:01:47 +0000 (17:01 +0000)
committer Florian Hahn <flo@fhahn.com>
Wed, 11 Mar 2020 17:40:08 +0000 (17:40 +0000)
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp

index 87b3ef8..5efd3ff 100644 (file)
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -10,8 +10,6 @@
  //
  // TODO:
  //  * Implement multiply & add fusion
-//  * Add remark, summarizing the available matrix optimization opportunities
-//    (WIP).
  //
  //===----------------------------------------------------------------------===//
  
@@ -25,6 +23,7 @@
  #include "llvm/Analysis/VectorUtils.h"
  #include "llvm/IR/CFG.h"
  #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
  #include "llvm/IR/Function.h"
  #include "llvm/IR/IRBuilder.h"
  #include "llvm/IR/Instructions.h"
@@ -50,6 +49,14 @@ static cl::opt<bool> AllowContractEnabled(
      cl::desc("Allow the use of FMAs if available and profitable. This may "
               "result in different results, due to less rounding error."));
  
+/// Helper function to either return Scope, if it is a subprogram or the
+/// attached subprogram for a local scope.
+static DISubprogram *getSubprogram(DIScope *Scope) {
+  if (auto *Subprogram = dyn_cast<DISubprogram>(Scope))
+    return Subprogram;
+  return cast<DILocalScope>(Scope)->getSubprogram();
+}
+
  namespace {
  
  // Given an element poitner \p BasePtr to the start of a (sub) matrix, compute
@@ -574,7 +581,7 @@ public:
        }
      }
  
-    RemarkGenerator RemarkGen(Inst2ColumnMatrix, ORE, DL);
+    RemarkGenerator RemarkGen(Inst2ColumnMatrix, ORE, Func);
      RemarkGen.emitRemarks();
  
      for (Instruction *Inst : reverse(ToRemove))
@@ -950,6 +957,9 @@ public:
      /// part of.
      const DenseMap<Value *, SmallPtrSet<Value *, 2>> &Shared;
  
+    /// Set of matrix expressions in the scope of a given DISubprogram.
+    const SmallSetVector<Value *, 32> &ExprsInSubprogram;
+
      /// Leaf node of the expression to linearize.
      Value *Leaf;
  
@@ -960,9 +970,10 @@ public:
      ExprLinearizer(const DataLayout &DL,
                     const MapVector<Value *, ColumnMatrixTy> &Inst2ColumnMatrix,
                     const DenseMap<Value *, SmallPtrSet<Value *, 2>> &Shared,
+                   const SmallSetVector<Value *, 32> &ExprsInSubprogram,
                     Value *Leaf)
          : Str(), Stream(Str), DL(DL), Inst2ColumnMatrix(Inst2ColumnMatrix),
-          Shared(Shared), Leaf(Leaf) {}
+          Shared(Shared), ExprsInSubprogram(ExprsInSubprogram), Leaf(Leaf) {}
  
      void indent(unsigned N) {
        LineLength += N;
@@ -996,10 +1007,8 @@ public:
        return V;
      }
  
-    /// Returns true if \p V is a matrix value.
-    bool isMatrix(Value *V) const {
-      return Inst2ColumnMatrix.find(V) != Inst2ColumnMatrix.end();
-    }
+    /// Returns true if \p V is a matrix value in the given subprogram.
+    bool isMatrix(Value *V) const { return ExprsInSubprogram.count(V); }
  
      /// If \p V is a matrix value, print its shape as as NumRows x NumColumns to
      /// \p SS.
@@ -1191,60 +1200,69 @@ public:
  
    /// Generate remarks for matrix operations in a function. To generate remarks
    /// for matrix expressions, the following approach is used:
-  /// 1. Collect leafs of matrix expressions (done in
-  ///    RemarkGenerator::getExpressionLeaves).  Leaves are lowered matrix
-  ///    instructions without other matrix users (like stores).
-  ///
-  /// 2. For each leaf, create a remark containing a linearizied version of the
-  ///    matrix expression.
-  ///
-  /// TODO:
-  ///  * Summarize number of vector instructions generated for each expression.
-  ///  * Propagate matrix remarks up the inlining chain.
+  /// 1. Use the inlined-at debug information to group matrix operations to the
+  ///    DISubprograms they are contained in.
+  /// 2. Collect leaves of matrix expressions (done in
+  ///    RemarkGenerator::getExpressionLeaves) for each subprogram - expression
+  //     mapping.  Leaves are lowered matrix instructions without other matrix
+  //     users (like stores) in the current subprogram.
+  /// 3. For each leaf, create a remark containing a linearizied version of the
+  ///    matrix expression. The expression is linearized by a recursive
+  ///    bottom-up traversal of the matrix operands, starting at a leaf. Note
+  ///    that multiple leaves can share sub-expressions. Shared subexpressions
+  ///    are explicitly marked as shared().
    struct RemarkGenerator {
      const MapVector<Value *, ColumnMatrixTy> &Inst2ColumnMatrix;
      OptimizationRemarkEmitter &ORE;
+    Function &Func;
      const DataLayout &DL;
  
      RemarkGenerator(const MapVector<Value *, ColumnMatrixTy> &Inst2ColumnMatrix,
-                    OptimizationRemarkEmitter &ORE, const DataLayout &DL)
-        : Inst2ColumnMatrix(Inst2ColumnMatrix), ORE(ORE), DL(DL) {}
-
-    /// Return all leafs of matrix expressions. Those are instructions in
-    /// Inst2ColumnMatrix returing void. Currently that should only include
-    /// stores.
-    SmallVector<Value *, 4> getExpressionLeaves() {
+                    OptimizationRemarkEmitter &ORE, Function &Func)
+        : Inst2ColumnMatrix(Inst2ColumnMatrix), ORE(ORE), Func(Func),
+          DL(Func.getParent()->getDataLayout()) {}
+
+    /// Return all leaves of the expressions in \p ExprsInSubprogram. Those are
+    /// instructions in Inst2ColumnMatrix returning void or without any users in
+    /// \p ExprsInSubprogram. Currently that should only include stores.
+    SmallVector<Value *, 4>
+    getExpressionLeaves(const SmallSetVector<Value *, 32> &ExprsInSubprogram) {
        SmallVector<Value *, 4> Leaves;
-      for (auto &KV : Inst2ColumnMatrix)
-        if (KV.first->getType()->isVoidTy())
-          Leaves.push_back(KV.first);
-
+      for (auto *Expr : ExprsInSubprogram)
+        if (Expr->getType()->isVoidTy() ||
+            !any_of(Expr->users(), [&ExprsInSubprogram](User *U) {
+              return ExprsInSubprogram.count(U);
+            }))
+          Leaves.push_back(Expr);
        return Leaves;
      }
  
      /// Recursively traverse expression \p V starting at \p Leaf and add \p Leaf
-    /// to all visited expressions in \p Shared.
+    /// to all visited expressions in \p Shared. Limit the matrix operations to
+    /// the ones in \p ExprsInSubprogram.
      void collectSharedInfo(Value *Leaf, Value *V,
+                           const SmallSetVector<Value *, 32> &ExprsInSubprogram,
                             DenseMap<Value *, SmallPtrSet<Value *, 2>> &Shared) {
  
-      if (Inst2ColumnMatrix.find(V) == Inst2ColumnMatrix.end())
+      if (!ExprsInSubprogram.count(V))
          return;
  
        auto I = Shared.insert({V, {}});
        I.first->second.insert(Leaf);
  
        for (Value *Op : cast<Instruction>(V)->operand_values())
-        collectSharedInfo(Leaf, Op, Shared);
+        collectSharedInfo(Leaf, Op, ExprsInSubprogram, Shared);
        return;
      }
  
      /// Calculate the number of exclusive and shared op counts for expression
      /// starting at \p V. Expressions used multiple times are counted once.
+    /// Limit the matrix operations to the ones in \p ExprsInSubprogram.
      std::pair<OpInfoTy, OpInfoTy>
      sumOpInfos(Value *Root, SmallPtrSetImpl<Value *> &ReusedExprs,
-               DenseMap<Value *, SmallPtrSet<Value *, 2>> &Shared) {
-      auto CM = Inst2ColumnMatrix.find(Root);
-      if (CM == Inst2ColumnMatrix.end())
+               const SmallSetVector<Value *, 32> &ExprsInSubprogram,
+               DenseMap<Value *, SmallPtrSet<Value *, 2>> &Shared) const {
+      if (!ExprsInSubprogram.count(Root))
          return {};
  
        // Already counted this expression. Stop.
@@ -1255,13 +1273,14 @@ public:
        OpInfoTy Count;
  
        auto I = Shared.find(Root);
+      auto CM = Inst2ColumnMatrix.find(Root);
        if (I->second.size() == 1)
          Count = CM->second.getOpInfo();
        else
          SharedCount = CM->second.getOpInfo();
  
        for (Value *Op : cast<Instruction>(Root)->operand_values()) {
-        auto C = sumOpInfos(Op, ReusedExprs, Shared);
+        auto C = sumOpInfos(Op, ReusedExprs, ExprsInSubprogram, Shared);
          Count += C.first;
          SharedCount += C.second;
        }
@@ -1272,49 +1291,83 @@ public:
        if (!ORE.allowExtraAnalysis(DEBUG_TYPE))
          return;
  
-      // Find leafs of matrix expressions.
-      auto Leaves = getExpressionLeaves();
-
-      DenseMap<Value *, SmallPtrSet<Value *, 2>> Shared;
-
-      for (Value *Leaf : Leaves)
-        collectSharedInfo(Leaf, Leaf, Shared);
-
-      // Generate remarks for each leaf.
-      for (auto *L : Leaves) {
-        SmallPtrSet<Value *, 8> ReusedExprs;
-        OpInfoTy Counts, SharedCounts;
-        std::tie(Counts, SharedCounts) = sumOpInfos(L, ReusedExprs, Shared);
-
-        OptimizationRemark Rem(DEBUG_TYPE, "matrix-lowered",
-                               cast<Instruction>(L)->getDebugLoc(),
-                               cast<Instruction>(L)->getParent());
-
-        Rem << "Lowered with ";
-        Rem << ore::NV("NumStores", Counts.NumStores) << " stores, "
-            << ore::NV("NumLoads", Counts.NumLoads) << " loads, "
-            << ore::NV("NumComputeOps", Counts.NumComputeOps) << " compute ops";
-
-        if (SharedCounts.NumStores > 0 || SharedCounts.NumLoads > 0 ||
-            SharedCounts.NumComputeOps > 0) {
-          Rem << ",\nadditionally "
-              << ore::NV("NumStores", SharedCounts.NumStores) << " stores, "
-              << ore::NV("NumLoads", SharedCounts.NumLoads) << " loads, "
-              << ore::NV("NumFPOps", SharedCounts.NumComputeOps)
-              << " compute ops"
-              << " are shared with other expressions";
+      // Map matrix operations to their containting subprograms, by traversing
+      // the inlinedAt chain. If the function does not have a DISubprogram, we
+      // only map them to the containing function.
+      MapVector<DISubprogram *, SmallVector<Value *, 8>> Subprog2Exprs;
+      for (auto &KV : Inst2ColumnMatrix) {
+        if (Func.getSubprogram()) {
+          auto *I = cast<Instruction>(KV.first);
+          DILocation *Context = I->getDebugLoc();
+          while (Context) {
+            auto I =
+                Subprog2Exprs.insert({getSubprogram(Context->getScope()), {}});
+            I.first->second.push_back(KV.first);
+            Context = DebugLoc(Context).getInlinedAt();
+          }
+        } else {
+          auto I = Subprog2Exprs.insert({nullptr, {}});
+          I.first->second.push_back(KV.first);
          }
+      }
+      for (auto &KV : Subprog2Exprs) {
+        SmallSetVector<Value *, 32> ExprsInSubprogram(KV.second.begin(),
+                                                      KV.second.end());
+        auto Leaves = getExpressionLeaves(ExprsInSubprogram);
+
+        DenseMap<Value *, SmallPtrSet<Value *, 2>> Shared;
+        for (Value *Leaf : Leaves)
+          collectSharedInfo(Leaf, Leaf, ExprsInSubprogram, Shared);
+
+        // Generate remarks for each leaf.
+        for (auto *L : Leaves) {
+
+          DebugLoc Loc = cast<Instruction>(L)->getDebugLoc();
+          DILocation *Context = cast<Instruction>(L)->getDebugLoc();
+          while (Context) {
+            if (getSubprogram(Context->getScope()) == KV.first) {
+              Loc = Context;
+              break;
+            }
+            Context = DebugLoc(Context).getInlinedAt();
+          }
+
+          SmallPtrSet<Value *, 8> ReusedExprs;
+          OpInfoTy Counts, SharedCounts;
+          std::tie(Counts, SharedCounts) =
+              sumOpInfos(L, ReusedExprs, ExprsInSubprogram, Shared);
+
+          OptimizationRemark Rem(DEBUG_TYPE, "matrix-lowered", Loc,
+                                 cast<Instruction>(L)->getParent());
+
+          Rem << "Lowered with ";
+          Rem << ore::NV("NumStores", Counts.NumStores) << " stores, "
+              << ore::NV("NumLoads", Counts.NumLoads) << " loads, "
+              << ore::NV("NumComputeOps", Counts.NumComputeOps)
+              << " compute ops";
+
+          if (SharedCounts.NumStores > 0 || SharedCounts.NumLoads > 0 ||
+              SharedCounts.NumComputeOps > 0) {
+            Rem << ",\nadditionally "
+                << ore::NV("NumStores", SharedCounts.NumStores) << " stores, "
+                << ore::NV("NumLoads", SharedCounts.NumLoads) << " loads, "
+                << ore::NV("NumFPOps", SharedCounts.NumComputeOps)
+                << " compute ops"
+                << " are shared with other expressions";
+          }
  
-        Rem << ("\n" + linearize(L, Shared, DL));
-        ORE.emit(Rem);
+          Rem << ("\n" + linearize(L, Shared, ExprsInSubprogram, DL));
+          ORE.emit(Rem);
+        }
        }
      }
  
      std::string
      linearize(Value *L,
                const DenseMap<Value *, SmallPtrSet<Value *, 2>> &Shared,
+              const SmallSetVector<Value *, 32> &ExprsInSubprogram,
                const DataLayout &DL) {
-      ExprLinearizer Lin(DL, Inst2ColumnMatrix, Shared, L);
+      ExprLinearizer Lin(DL, Inst2ColumnMatrix, Shared, ExprsInSubprogram, L);
        Lin.linearizeExpr(L, 0, false, false);
        return Lin.getResult();
      }
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/remarks-inlining.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/remarks-inlining.ll

new file mode 100644 (file)

index 0000000..f0a5212
--- /dev/null
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/remarks-inlining.ll
@@ -0,0 +1,166 @@
+; REQUIRES: aarch64-registered-target
+
+; This test needs to be target specific due to the cost estimate in the output.
+
+; RUN: opt -lower-matrix-intrinsics -pass-remarks=lower-matrix-intrinsics -mtriple=arm64-apple-iphoneos -S < %s 2>&1 | FileCheck  %s
+
+; Test the propagation of matrix expressions along to inlined-at chain. The IR
+; in the test roughly corresponds to the C++ code below, with the IR containing
+; references to a few more functions.
+
+; matrix.h
+; template <typename Ty, unsigned R, unsigned C>
+; struct Matrix {
+;   using matrix_t = Ty __attribute__((matrix_type(R, C)));
+;
+;   matrix_t value;
+; };
+;
+; ; add.h
+; template <typename Ty, unsigned R, unsigned C>
+; Matrix<Ty, R, C> add(Matrix<Ty, R, C> M1, Matrix<Ty, R, C> M2) {
+;   Matrix<Ty, R, C> Result;
+;   Result.value = __builtin_matrix_add(M1.value, M2.value);
+;   return Result;
+; }
+;
+; load.h:
+; template <typename Ty, unsigned R, unsigned C>
+; Matrix<Ty, R, C> load(Ty *Ptr) {
+;   Matrix<Ty, R, C> Result;
+;   Result.value = *reinterpret_cast <typename Matrix<Ty, R, C>::matrix_t *>(Ptr);
+;   return Result;
+; }
+;
+; store.h:
+; template <typename Ty, unsigned R, unsigned C>
+; void store(Matrix<Ty, R, C> M1, Ty *Ptr) {
+;   *reinterpret_cast<typename decltype(M1)::matrix_t *>(Ptr) = M1.value;
+; }
+;
+; toplevel.cpp
+; void test(double *A, double *B, double *C) {
+;   store(add(load<double, 3, 5>(A), load<double, 3, 5>(B)), C);
+; }
+;
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "aarch64-apple-ios"
+
+; CHECK-LABEL: remark: load.h:41:43: Lowered with 0 stores, 10 loads, 0 compute ops
+; CHECK-NEXT:  load(addr %A)
+
+; CHECK-LABEL: remark: load.h:41:43: Lowered with 0 stores, 10 loads, 0 compute ops
+; CHECK-NEXT:  columnwise.load.3x5.double(addr %B, 5)
+
+; CHECK-LABEL: remark: load.h:41:11: Lowered with 0 stores, 1 loads, 0 compute ops
+; CHECK-NEXT: load(addr %D)
+
+; CHECK-LABEL: remark: assign.h:32:43: Lowered with 0 stores, 10 loads, 0 compute ops
+; CHECK-NEXT:  load(addr %A)
+
+; CHECK-LABEL: remark: assign.h:32:43: Lowered with 0 stores, 10 loads, 0 compute ops
+; CHECK-NEXT:  columnwise.load.3x5.double(addr %B, 5)
+
+; CHECK-LABEL: remark: toplevel.c:410:0: Lowered with 10 stores, 20 loads, 10 compute ops
+; CHECK-NEXT:  store(
+; CHECK-NEXT:   fadd(
+; CHECK-NEXT:    load(addr %A),
+; CHECK-NEXT:    columnwise.load.3x5.double(addr %B, 5)),
+; CHECK-NEXT:   addr %C)
+
+; CHECK-LABEL: remark: toplevel.c:510:0: Lowered with 1 stores, 1 loads, 8 compute ops
+; CHECK-NEXT:  store(
+; CHECK-NEXT:   transpose.1x2.float(transpose.2x1.float(load(addr %D))),
+; CHECK-NEXT:   addr %D)
+
+; CHECK-LABEL: remark: add.h:66:11: Lowered with 0 stores, 0 loads, 10 compute ops
+; CHECK-NEXT:  fadd(
+; CHECK-NEXT:   addr %A,
+; CHECK-NEXT:   scalar)
+
+; CHECK-LABEL: remark: store.h:10:11: Lowered with 10 stores, 0 loads, 0 compute ops
+; CHECK-NEXT:  store(
+; CHECK-NEXT:   scalar,
+; CHECK-NEXT:   addr %C)
+
+; CHECK-LABEL: remark: store.h:66:11: Lowered with 1 stores, 0 loads, 0 compute ops
+; CHECK-NEXT:  store(
+; CHECK-NEXT:  scalar,
+; CHECK-NEXT:  addr %D)
+
+; CHECK-LABEL: remark: transpose.h:13:11: Lowered with 0 stores, 0 loads, 8 compute ops
+; CHECK-NEXT:  transpose.1x2.float(transpose.2x1.float(addr %D))
+
+define void @toplevel(<15 x double>* %A, <15 x double>* %B, <15 x double>* %C, <2 x float>* %D) !dbg !16 {
+entry:
+  %a = load <15 x double>, <15 x double> *%A, align 16, !dbg !3791
+  %b = call <15 x double> @llvm.matrix.columnwise.load(<15 x double>* %B, i32 5, i32 3, i32 5), !dbg !3793
+  %c  = fadd <15 x double> %a, %b, !dbg !100
+  store <15 x double> %c, <15 x double> *%C, align 16, !dbg !102
+
+  %load = load <2 x float>, <2 x float>* %D, !dbg !104
+  %t1 = call <2 x float> @llvm.matrix.transpose(<2 x float> %load, i32 2, i32 1), !dbg !106
+  %t2 = call <2 x float> @llvm.matrix.transpose(<2 x float> %t1, i32 1, i32 2), !dbg !106
+  store <2 x float> %t2, <2 x float>* %D, !dbg !108
+  ret void
+}
+
+declare <15 x double> @llvm.matrix.columnwise.load(<15 x double>*, i32, i32, i32)
+declare <2 x float> @llvm.matrix.transpose(<2 x float>, i32, i32)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "load.h", directory: "/test")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = distinct !DISubprogram(name: "load_fn", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12)
+!17 = !DIFile(filename: "toplevel.c", directory: "/test")
+!16 = distinct !DISubprogram(name: "toplevel", scope: !1, file: !17, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12)
+!18 = !DIFile(filename: "assign.h", directory: "/test")
+!19 = distinct !DISubprogram(name: "assign", scope: !1, file: !18, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12)
+
+!20 = !DIFile(filename: "add.h", directory: "/test")
+!21 = distinct !DISubprogram(name: "add_fn", scope: !1, file: !20, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12)
+
+!22 = !DIFile(filename: "store.h", directory: "/test")
+!23 = distinct !DISubprogram(name: "store_fn", scope: !1, file: !22, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12)
+
+!24 = !DIFile(filename: "transpose.h", directory: "/test")
+!25 = distinct !DISubprogram(name: "transpose", scope: !1, file: !24, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12)
+
+
+!6 = !DISubroutineType(types: !7)
+!7 = !{null, !8, !8, !11}
+!8 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !9)
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 32, align: 32)
+!10 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float)
+!11 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!12 = !{!13}
+!13 = !DILocalVariable(name: "a", arg: 1, scope: !5, file: !1, line: 1, type: !8)
+!14 = !DILocation(line: 1, column: 27, scope: !5)
+
+!3791 = !DILocation(line: 41, column: 43, scope: !5, inlinedAt: !3795)
+!3792 = !DILocation(line: 405, column: 3, scope: !16)
+!3793 = !DILocation(line: 41, column: 43, scope: !5, inlinedAt: !3796)
+!3794 = !DILocation(line: 406, column: 11, scope: !16)
+!3795 = !DILocation(line: 32, column: 43, scope: !19, inlinedAt: !3792)
+!3796 = !DILocation(line: 32, column: 43, scope: !19, inlinedAt: !3794)
+
+!100 = !DILocation(line: 66, column: 11, scope: !21, inlinedAt: !101)
+!101 = !DILocation(line: 410, column: 11, scope: !16)
+
+!102 = !DILocation(line: 10, column: 11, scope: !23, inlinedAt: !103)
+!103 = !DILocation(line: 410, column: 0, scope: !16)
+
+!104 = !DILocation(line: 41, column: 11, scope: !5, inlinedAt: !101)
+!105 = !DILocation(line: 500, column: 11, scope: !16)
+
+!106 = !DILocation(line: 13, column: 11, scope: !25, inlinedAt: !101)
+!107 = !DILocation(line: 510, column: 11, scope: !16)
+
+!108 = !DILocation(line: 66, column: 11, scope: !23, inlinedAt: !109)
+!109 = !DILocation(line: 510, column: 0, scope: !16)
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/remarks.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/remarks.ll

index 0bca099..c83648e 100644 (file)
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/remarks.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/remarks.ll
@@ -71,8 +71,8 @@ declare void @llvm.matrix.columnwise.store(<9 x double>, <9 x double>*, i32, i32
  
  define void @binaryops(<9 x double>* %A, <9 x double>* %B) !dbg !31 {
    %A.matrix = call <9 x double> @llvm.matrix.columnwise.load(<9 x double>* %A, i32 5, i32 3, i32 3), !dbg !32
-  %R1.matrix = fadd <9 x double> %A.matrix, %A.matrix
-  %R2.matrix = fmul <9 x double> %R1.matrix, %A.matrix
+  %R1.matrix = fadd <9 x double> %A.matrix, %A.matrix, !dbg !32
+  %R2.matrix = fmul <9 x double> %R1.matrix, %A.matrix, !dbg !32
    call void @llvm.matrix.columnwise.store(<9 x double> %R2.matrix, <9 x double>* %B, i32 10, i32 3, i32 3), !dbg !32
    ret void
  }
@@ -95,8 +95,8 @@ define void @binaryops(<9 x double>* %A, <9 x double>* %B) !dbg !31 {
  
  define void @multiple_expressions(<9 x double>* %A, <9 x double>* %B, <12 x double>* %C, <12 x double>* %D, <4 x double>* %E) !dbg !33 {
    %A.matrix = call <9 x double> @llvm.matrix.columnwise.load(<9 x double>* %A, i32 5, i32 3, i32 3), !dbg !34
-  %R1.matrix = fadd <9 x double> %A.matrix, %A.matrix
-  %R2.matrix = fmul <9 x double> %R1.matrix, %A.matrix
+  %R1.matrix = fadd <9 x double> %A.matrix, %A.matrix, !dbg !34
+  %R2.matrix = fmul <9 x double> %R1.matrix, %A.matrix, !dbg !34
    call void @llvm.matrix.columnwise.store(<9 x double> %R2.matrix, <9 x double>* %B, i32 10, i32 3, i32 3), !dbg !34
  
    %C.matrix = load <12 x double>, <12 x double>* %C, !dbg !34
@@ -119,8 +119,8 @@ define void @multiple_expressions(<9 x double>* %A, <9 x double>* %B, <12 x doub
  define void @stackaddresses(<9 x double>* %A) !dbg !35 {
    %B = alloca <9 x double>
    %A.matrix = call <9 x double> @llvm.matrix.columnwise.load(<9 x double>* %A, i32 5, i32 3, i32 3), !dbg !36
-  %R1.matrix = fadd <9 x double> %A.matrix, %A.matrix
-  %R2.matrix = fmul <9 x double> %R1.matrix, %A.matrix
+  %R1.matrix = fadd <9 x double> %A.matrix, %A.matrix, !dbg !36
+  %R2.matrix = fmul <9 x double> %R1.matrix, %A.matrix, !dbg !36
    call void @llvm.matrix.columnwise.store(<9 x double> %R2.matrix, <9 x double>* %B, i32 10, i32 3, i32 3), !dbg !36
    ret void
  }
@@ -140,7 +140,7 @@ entry:
    %s2 = bitcast <15 x double>* %s1 to i64*, !dbg !22
    %s3 = bitcast i64* %s2 to <15 x double>*, !dbg !22
  
-  %t = call <15 x double> @llvm.matrix.transpose.v15f64.v15f64(<15 x double> %av, i32 5, i32 3)
+  %t = call <15 x double> @llvm.matrix.transpose.v15f64.v15f64(<15 x double> %av, i32 5, i32 3), !dbg !22
  
    store <15 x double> %t, <15 x double>* %s3, !dbg !22
    ret void
author	Florian Hahn <flo@fhahn.com>
	Wed, 11 Mar 2020 17:01:47 +0000 (17:01 +0000)
committer	Florian Hahn <flo@fhahn.com>
	Wed, 11 Mar 2020 17:40:08 +0000 (17:40 +0000)
llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp		patch \| blob \| history
llvm/test/Transforms/LowerMatrixIntrinsics/remarks-inlining.ll	[new file with mode: 0644]	patch \| blob
llvm/test/Transforms/LowerMatrixIntrinsics/remarks.ll		patch \| blob \| history