Add lowering of linalg.copy to an external C++ library and a test.
authorNicolas Vasilache <ntv@google.com>
Fri, 23 Aug 2019 18:08:59 +0000 (11:08 -0700)
committerA. Unique TensorFlower <gardener@tensorflow.org>
Fri, 23 Aug 2019 18:09:53 +0000 (11:09 -0700)
This CL extends support for lowering of linalg to external C++ libraries with CopyOp. Currently this can only work when the permutation maps in the copies are identity. Future support for permutations will be added later.

PiperOrigin-RevId: 265093025

mlir/lib/Dialect/Linalg/Transforms/LowerToLLVMDialect.cpp
mlir/test/Linalg/llvm.mlir
mlir/test/mlir-cpu-runner/cblas_interface.cpp
mlir/test/mlir-cpu-runner/linalg_integration_test.mlir

index b6e0430..e4ce0ca 100644 (file)
@@ -670,11 +670,23 @@ public:
   PatternMatchResult
   matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
                   ConversionPatternRewriter &rewriter) const override {
-    // Only emit library call declaration. Fill in the body later.
     auto f = getLLVMLibraryCallDeclaration<LinalgOp>(op, lowering, rewriter);
     if (!f)
       return matchFailure();
 
+    if (std::is_same<LinalgOp, CopyOp>::value) {
+      auto copyOp = cast<CopyOp>(op);
+
+      // Ensure permutations are identity.
+      // TODO(ntv): insert a transpose op that captures the permutations and
+      // remove this.
+      auto inputPerm = copyOp.inputPermutation();
+      if (inputPerm.hasValue() && !inputPerm->isIdentity())
+        return matchFailure();
+      auto outputPerm = copyOp.outputPermutation();
+      if (outputPerm.hasValue() && !outputPerm->isIdentity())
+        return matchFailure();
+    }
     auto fAttr = rewriter.getSymbolRefAttr(f);
     auto named = rewriter.getNamedAttr("callee", fAttr);
     rewriter.replaceOpWithNewOp<LLVM::CallOp>(op, operands,
@@ -688,13 +700,12 @@ static void
 populateLinalgToLLVMConversionPatterns(LinalgTypeConverter &converter,
                                        OwningRewritePatternList &patterns,
                                        MLIRContext *ctx) {
-  patterns
-      .insert<BufferAllocOpConversion, BufferDeallocOpConversion,
-              BufferSizeOpConversion, DimOpConversion,
-              LinalgOpConversion<DotOp>, LinalgOpConversion<FillOp>,
-              LinalgOpConversion<MatmulOp>, LoadOpConversion, RangeOpConversion,
-              SliceOpConversion, StoreOpConversion, ViewOpConversion>(
-          ctx, converter);
+  patterns.insert<BufferAllocOpConversion, BufferDeallocOpConversion,
+                  BufferSizeOpConversion, DimOpConversion,
+                  LinalgOpConversion<CopyOp>, LinalgOpConversion<DotOp>,
+                  LinalgOpConversion<FillOp>, LinalgOpConversion<MatmulOp>,
+                  LoadOpConversion, RangeOpConversion, SliceOpConversion,
+                  StoreOpConversion, ViewOpConversion>(ctx, converter);
 }
 
 namespace {
index ea3d9d0..5246103 100644 (file)
@@ -191,3 +191,10 @@ func @view_with_range_and_index(%arg0: !linalg.view<?x?xf64>) {
 //       CHECK:   llvm.extractvalue %{{.*}}[1] : !llvm<"{ i64, i64, i64 }">
 //       CHECK:   llvm.insertvalue %{{.*}}[2, 0] : !llvm<"{ double*, i64, [1 x i64], [1 x i64] }">
 //       CHECK:   llvm.insertvalue %{{.*}}[3, 0] : !llvm<"{ double*, i64, [1 x i64], [1 x i64] }">
+
+func @copy(%arg0: !linalg.view<?x?x?xf32>, %arg1: !linalg.view<?x?x?xf32>) {
+  linalg.copy(%arg0, %arg1) : !linalg.view<?x?x?xf32>, !linalg.view<?x?x?xf32>
+  return
+}
+// CHECK-LABEL: func @copy
+//       CHECK:   llvm.call @linalg_copy_viewxxxf32_viewxxxf32(%{{.*}}, %{{.*}}) : (!llvm<"{ float*, i64, [3 x i64], [3 x i64] }*">, !llvm<"{ float*, i64, [3 x i64], [3 x i64] }*">) -> ()
index f576711..514d522 100644 (file)
@@ -45,6 +45,37 @@ extern "C" void linalg_fill_viewxf32_f32(ViewType<float, 1> *X, float f) {
     *(X->data + X->offset + i * X->strides[0]) = f;
 }
 
+extern "C" void linalg_fill_viewxxf32_f32(ViewType<float, 2> *X, float f) {
+  for (unsigned i = 0; i < X->sizes[0]; ++i)
+    for (unsigned j = 0; j < X->sizes[1]; ++j)
+      *(X->data + X->offset + i * X->strides[0] + j * X->strides[1]) = f;
+}
+
+extern "C" void linalg_copy_viewf32_viewf32(ViewType<float, 0> *I,
+                                            ViewType<float, 0> *O) {
+  O->data[O->offset] = I->data[I->offset];
+}
+
+extern "C" void linalg_copy_viewxf32_viewxf32(ViewType<float, 1> *I,
+                                              ViewType<float, 1> *O) {
+  assert(I->sizes[0] == O->sizes[0]);
+  for (unsigned i = 0; i < I->sizes[0]; ++i)
+    O->data[O->offset + i * O->strides[0]] =
+        I->data[I->offset + i * I->strides[0]];
+}
+
+extern "C" void linalg_copy_viewxxf32_viewxxf32(ViewType<float, 2> *I,
+                                                ViewType<float, 2> *O) {
+  assert(I->sizes[0] == O->sizes[0]);
+  assert(I->sizes[1] == O->sizes[1]);
+  auto so0 = O->strides[0], so1 = O->strides[1];
+  auto si0 = I->strides[0], si1 = I->strides[1];
+  for (unsigned i = 0; i < I->sizes[0]; ++i)
+    for (unsigned j = 0; j < I->sizes[1]; ++j)
+      O->data[O->offset + i * so0 + j * so1] =
+          I->data[I->offset + i * si0 + j * si1];
+}
+
 extern "C" void linalg_dot_viewxf32_viewxf32_viewf32(ViewType<float, 1> *X,
                                                      ViewType<float, 1> *Y,
                                                      ViewType<float, 0> *Z) {
index 7741ce9..9c5d9aa 100644 (file)
@@ -2,6 +2,8 @@
 // RUN: mlir-opt %s -linalg-lower-to-loops -linalg-lower-to-llvm-dialect | mlir-cpu-runner -e dot -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libcblas%shlibext,%linalg_test_lib_dir/libcblas_interface%shlibext | FileCheck %s
 // RUN: mlir-opt %s -linalg-lower-to-llvm-dialect | mlir-cpu-runner -e matmul -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libcblas%shlibext,%linalg_test_lib_dir/libcblas_interface%shlibext | FileCheck %s
 // RUN: mlir-opt %s -linalg-lower-to-loops -linalg-lower-to-llvm-dialect | mlir-cpu-runner -e matmul -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libcblas%shlibext,%linalg_test_lib_dir/libcblas_interface%shlibext | FileCheck %s
+// RUN: mlir-opt %s -linalg-tile -linalg-tile-sizes=2,3,4 -linalg-tile-promote-full-tile-views=true -linalg-lower-to-loops -linalg-lower-to-llvm-dialect | mlir-cpu-runner -e matmul -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libcblas%shlibext,%linalg_test_lib_dir/libcblas_interface%shlibext | FileCheck %s
+// RUN: mlir-opt %s -linalg-tile -linalg-tile-sizes=2,3,4 -linalg-tile-promote-full-tile-views=true -linalg-lower-to-llvm-dialect | mlir-cpu-runner -e matmul -entry-point-result=f32 -shared-libs=%linalg_test_lib_dir/libcblas%shlibext,%linalg_test_lib_dir/libcblas_interface%shlibext | FileCheck %s
 
 // Creates and returns a 1-D buffer of size %s filled with the value %f
 func @alloc_filled_f32(%s : index, %f : f32) -> !linalg.buffer<?xf32> {