From: kon72 Date: Fri, 12 May 2023 14:42:12 +0000 (+0200) Subject: [mlir][linalg] Add channel-first variants of convolution X-Git-Tag: upstream/17.0.6~8705 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c8e056065898bba2d51fb655da40bd4ee7beedcf;p=platform%2Fupstream%2Fllvm.git [mlir][linalg] Add channel-first variants of convolution This change adds the following three operations and unit tests for them: - conv_3d_ncdhw_fcdhw - depthwise_conv_1d_ncw_cw - depthwise_conv_3d_ncdhw_cdhw Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D150054 --- diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml index cbe40fc..52ab6c7 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml @@ -2283,6 +2283,106 @@ structured_op: !LinalgStructuredOpConfig scalar_arg: KZp --- !LinalgOpConfig metadata: !LinalgOpMetadata + name: conv_3d_ncdhw_fcdhw + cpp_class_name: Conv3DNcdhwFcdhwOp + doc: |- + Performs 3-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + implements: + - LinalgConvolutionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + kind: input_tensor + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14] -> (s0, s1, s2 * s3 + s4 * s5, s6 * s7 + s8 * s9, s10 * s11 + s12 + * s13)> + - !LinalgOperandDefConfig + name: K + kind: input_tensor + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14] -> (s14, s1, s4, s8, s12)> + - !LinalgOperandDefConfig + name: O + kind: output_tensor + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14] -> (s0, s14, s2, s6, s10)> + - !LinalgOperandDefConfig + name: strides + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, + s12, s13, s14] -> (s3, s7, s11)> + default_indices: + - 1 + - 1 + - 1 + - !LinalgOperandDefConfig + name: dilations + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, + s12, s13, s14] -> (s5, s9, s13)> + default_indices: + - 1 + - 1 + - 1 + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6, + s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d8, d1 * s3 + d5 * s5, d2 * s7 + + d6 * s9, d3 * s11 + d7 * s13)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6, + s7, s8, s9, s10, s11, s12, s13, s14] -> (d4, d8, d5, d6, d7)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6, + s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d4, d1, d2, d3)> + iterator_types: + - parallel + - parallel + - parallel + - parallel + - parallel + - reduction + - reduction + - reduction + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_fn: + kind: binary + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_fn: + kind: binary + fn_name: mul + operands: + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: I + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: K +--- !LinalgOpConfig +metadata: !LinalgOpMetadata name: depthwise_conv_1d_nwc_wc cpp_class_name: DepthwiseConv1DNwcWcOp doc: |- @@ -2366,6 +2466,89 @@ structured_op: !LinalgStructuredOpConfig scalar_arg: K --- !LinalgOpConfig metadata: !LinalgOpMetadata + name: depthwise_conv_1d_ncw_cw + cpp_class_name: DepthwiseConv1DNcwCwOp + doc: |- + Performs depth-wise 1-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. Multiplier is set to 1 + which is a special case for most depthwise convolutions. + implements: + - LinalgConvolutionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + kind: input_tensor + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2 * s3 + s4 * s5)> + - !LinalgOperandDefConfig + name: K + kind: input_tensor + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s1, s4)> + - !LinalgOperandDefConfig + name: O + kind: output_tensor + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2)> + - !LinalgOperandDefConfig + name: strides + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)> + default_indices: + - 1 + - !LinalgOperandDefConfig + name: dilations + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s5)> + default_indices: + - 1 + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d2, d1 * s3 + d3 + * s5)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d2, d3)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d2, d1)> + iterator_types: + - parallel + - parallel + - parallel + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_fn: + kind: binary + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_fn: + kind: binary + fn_name: mul + operands: + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: I + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: K +--- !LinalgOpConfig +metadata: !LinalgOpMetadata name: depthwise_conv_1d_nwc_wcm cpp_class_name: DepthwiseConv1DNwcWcmOp doc: |- @@ -3092,6 +3275,105 @@ structured_op: !LinalgStructuredOpConfig scalar_arg: K --- !LinalgOpConfig metadata: !LinalgOpMetadata + name: depthwise_conv_3d_ncdhw_cdhw + cpp_class_name: DepthwiseConv3DNcdhwCdhwOp + doc: |- + Performs depth-wise 3-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. Multiplier is set to 1 + which is a special case for most depthwise convolutions. + implements: + - LinalgConvolutionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + kind: input_tensor + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13] -> (s0, s1, s2 * s3 + s4 * s5, s6 * s7 + s8 * s9, s10 * s11 + s12 * s13)> + - !LinalgOperandDefConfig + name: K + kind: input_tensor + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13] -> (s1, s4, s8, s12)> + - !LinalgOperandDefConfig + name: O + kind: output_tensor + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13] -> (s0, s1, s2, s6, s10)> + - !LinalgOperandDefConfig + name: strides + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, + s12, s13] -> (s3, s7, s11)> + default_indices: + - 1 + - 1 + - 1 + - !LinalgOperandDefConfig + name: dilations + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, + s12, s13] -> (s5, s9, s13)> + default_indices: + - 1 + - 1 + - 1 + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11, s12, s13] -> (d0, d7, d1 * s3 + d4 * s5, d2 * s7 + d5 * s9, + d3 * s11 + d6 * s13)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11, s12, s13] -> (d7, d4, d5, d6)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11, s12, s13] -> (d0, d7, d1, d2, d3)> + iterator_types: + - parallel + - parallel + - parallel + - parallel + - reduction + - reduction + - reduction + - parallel + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_fn: + kind: binary + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_fn: + kind: binary + fn_name: mul + operands: + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: I + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: K +--- !LinalgOpConfig +metadata: !LinalgOpMetadata name: depthwise_conv_3d_ndhwc_dhwcm cpp_class_name: DepthwiseConv3DNdhwcDhwcmOp doc: |- @@ -4722,4 +5004,3 @@ structured_op: !LinalgStructuredOpConfig scalar_const: '2.3283063999999999E-10 : f64' - !ScalarExpression scalar_arg: min - diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py index 4402624..9c96868 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py @@ -494,6 +494,33 @@ def conv_3d_ndhwc_dhwcf_q(I=TensorDef(T1, S.N, S.OD * S.SD + S.KD * S.DD, @linalg_structured_op +def conv_3d_ncdhw_fcdhw(I=TensorDef(T1, S.N, S.C, S.OD * S.SD + S.KD * S.DD, + S.OH * S.SH + S.KH * S.DH, + S.OW * S.SW + S.KW * S.DW), + K=TensorDef(T2, S.F, S.C, S.KD, S.KH, S.KW), + O=TensorDef(U, S.N, S.F, S.OD, S.OH, S.OW, output=True), + strides=IndexAttrDef(S.SD, + S.SH, + S.SW, + default=[1, 1, 1]), + dilations=IndexAttrDef(S.DD, + S.DH, + S.DW, + default=[1, 1, 1])): + """Performs 3-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + """ + implements(ConvolutionOpInterface) + domain(D.n, D.od, D.oh, D.ow, D.f, D.kd, D.kh, D.kw, D.c) + O[D.n, D.f, D.od, D.oh, D.ow] += TypeFn.cast_signed( + U, I[D.n, D.c, D.od * S.SD + D.kd * S.DD, D.oh * S.SH + D.kh * S.DH, + D.ow * S.SW + D.kw * S.DW]) * TypeFn.cast_signed( + U, K[D.f, D.c, D.kd, D.kh, D.kw]) + + +@linalg_structured_op def depthwise_conv_1d_nwc_wc(I=TensorDef(T1, S.N, S.OW * S.SW + S.KW * S.DW, S.IC), K=TensorDef(T2, S.KW, S.IC), @@ -514,6 +541,26 @@ def depthwise_conv_1d_nwc_wc(I=TensorDef(T1, S.N, S.OW * S.SW + S.KW * S.DW, @linalg_structured_op +def depthwise_conv_1d_ncw_cw(I=TensorDef(T1, S.N, S.IC, + S.OW * S.SW + S.KW * S.DW), + K=TensorDef(T2, S.IC, S.KW), + O=TensorDef(U, S.N, S.IC, S.OW, output=True), + strides=IndexAttrDef(S.SW, default=[1]), + dilations=IndexAttrDef(S.DW, default=[1])): + """Performs depth-wise 1-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. Multiplier is set to 1 + which is a special case for most depthwise convolutions. + """ + implements(ConvolutionOpInterface) + domain(D.n, D.ow, D.ic, D.kw) + O[D.n, D.ic, D.ow] += \ + TypeFn.cast_signed(U, I[D.n, D.ic, D.ow * S.SW + D.kw * S.DW]) * \ + TypeFn.cast_signed(U, K[D.ic, D.kw]) + + +@linalg_structured_op def depthwise_conv_1d_nwc_wcm(I=TensorDef(T1, S.N, S.OW * S.SW + S.KW * S.DW, S.IC), K=TensorDef(T2, S.KW, S.IC, S.CM), @@ -717,6 +764,41 @@ def depthwise_conv_3d_ndhwc_dhwc(I=TensorDef(T1, S.N, S.OD * S.SD + S.KD * S.DD, @linalg_structured_op +def depthwise_conv_3d_ncdhw_cdhw(I=TensorDef(T1, S.N, S.IC, + S.OD * S.SD + S.KD * S.DD, + S.OH * S.SH + S.KH * S.DH, + S.OW * S.SW + S.KW * S.DW), + K=TensorDef(T2, S.IC, S.KD, S.KH, S.KW), + O=TensorDef(U, + S.N, + S.IC, + S.OD, + S.OH, + S.OW, + output=True), + strides=IndexAttrDef(S.SD, + S.SH, + S.SW, + default=[1, 1, 1]), + dilations=IndexAttrDef(S.DD, + S.DH, + S.DW, + default=[1, 1, 1])): + """Performs depth-wise 3-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. Multiplier is set to 1 + which is a special case for most depthwise convolutions. + """ + implements(ConvolutionOpInterface) + domain(D.n, D.od, D.oh, D.ow, D.kd, D.kh, D.kw, D.ic) + O[D.n, D.ic, D.od, D.oh, D.ow] += TypeFn.cast_signed( + U, I[D.n, D.ic, D.od * S.SD + D.kd * S.DD, D.oh * S.SH + D.kh * S.DH, + D.ow * S.SW + D.kw * S.DW]) * TypeFn.cast_signed( + U, K[D.ic, D.kd, D.kh, D.kw]) + + +@linalg_structured_op def depthwise_conv_3d_ndhwc_dhwcm(I=TensorDef(T1, S.N, S.OD * S.SD + S.KD * S.DD, S.OH * S.SH + S.KH * S.DH, @@ -749,6 +831,7 @@ def depthwise_conv_3d_ndhwc_dhwcm(I=TensorDef(T1, S.N, D.ow * S.SW + D.kw * S.DW, D.ic]) * TypeFn.cast_signed( U, K[D.kd, D.kh, D.kw, D.ic, D.cm]) + @linalg_structured_op def pooling_nhwc_sum(I=TensorDef(T1, S.N, S.OH * S.SH + S.KH * S.DH, S.OW * S.SW + S.KW * S.DW, S.C), diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir index f5e266e..e89fb8d 100644 --- a/mlir/test/Dialect/Linalg/named-ops.mlir +++ b/mlir/test/Dialect/Linalg/named-ops.mlir @@ -28,6 +28,20 @@ func.func @depthwise_conv_1d_nwc_wc(%input: tensor<1x12x8xf32>, %filter: tensor< // ----- +// CHECK-LABEL: func @depthwise_conv_1d_ncw_cw +func.func @depthwise_conv_1d_ncw_cw(%input: tensor<1x8x12xf32>, %filter: tensor<8x3xf32>) -> tensor<1x8x10xf32> { + %zero = arith.constant 0.000000e+00 : f32 + %init = tensor.empty() : tensor<1x8x10xf32> + %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x8x10xf32>) -> tensor<1x8x10xf32> + // CHECK: depthwise_conv_1d_ncw_cw + %0 = linalg.depthwise_conv_1d_ncw_cw {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + ins(%input, %filter : tensor<1x8x12xf32>, tensor<8x3xf32>) + outs(%fill : tensor<1x8x10xf32>) -> tensor<1x8x10xf32> + return %0 : tensor<1x8x10xf32> +} + +// ----- + // CHECK-LABEL: func @depthwise_conv_2d_nhwc_hwcm_tensor func.func @depthwise_conv_2d_nhwc_hwcm_tensor(%input: tensor<2x4x5x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x3x4x2x3xf32> { %zero = arith.constant 0.000000e+00 : f32 @@ -221,6 +235,20 @@ func.func @depthwise_conv_3d_ndhwc_dhwc(%input: tensor<2x6x13x12x6xf32>, %filter // ----- +// CHECK-LABEL: func @depthwise_conv_3d_ncdhw_cdhw +func.func @depthwise_conv_3d_ncdhw_cdhw(%input: tensor<2x6x6x13x12xf32>, %filter: tensor<6x2x1x3xf32>) -> tensor<2x6x3x13x4xf32> { + %zero = arith.constant 0.000000e+00 : f32 + %init = tensor.empty() : tensor<2x6x3x13x4xf32> + %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x6x3x13x4xf32>) -> tensor<2x6x3x13x4xf32> + // CHECK: depthwise_conv_3d_ncdhw_cdhw + %0 = linalg.depthwise_conv_3d_ncdhw_cdhw {dilations = dense<1> : tensor<3xi64>, strides = dense<[2, 1, 3]> : tensor<3xi64>} + ins(%input, %filter : tensor<2x6x6x13x12xf32>, tensor<6x2x1x3xf32>) + outs(%fill : tensor<2x6x3x13x4xf32>) -> tensor<2x6x3x13x4xf32> + return %0 : tensor<2x6x3x13x4xf32> +} + +// ----- + // CHECK-LABEL: func @conv_1d_nwc_wcf func.func @conv_1d_nwc_wcf(%input: tensor, %filter: tensor, %init: tensor) -> tensor { // CHECK: %{{.+}} = linalg.conv_1d_nwc_wcf @@ -413,6 +441,38 @@ func.func @conv_3d_ndhwc_dhwcf(%input: memref, %filter: memref, %filter: tensor, %init: tensor) -> tensor { + // CHECK: %{{.+}} = linalg.conv_3d_ncdhw_fcdhw + // CHECK-SAME: dilations = dense<1> : tensor<3xi64> + // CHECK-SAME: strides = dense<1> : tensor<3xi64> + // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) + // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + %0 = linalg.conv_3d_ncdhw_fcdhw {dilations = dense<1> : tensor<3xi64>, + strides = dense<1> : tensor<3xi64>} + ins (%input, %filter: tensor, tensor) + outs (%init: tensor) -> tensor + return %0 : tensor +} + +// ----- + +// CHECK-LABEL: func @conv_3d_ncdhw_fcdhw +func.func @conv_3d_ncdhw_fcdhw(%input: memref, %filter: memref, %output: memref) { + // CHECK: linalg.conv_3d_ncdhw_fcdhw + // CHECK-SAME: dilations = dense<1> : tensor<3xi64> + // CHECK-SAME: strides = dense<1> : tensor<3xi64> + // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) + // CHECK-SAME: outs(%{{.+}} : memref) + linalg.conv_3d_ncdhw_fcdhw {dilations = dense<1> : tensor<3xi64>, + strides = dense<1> : tensor<3xi64>} + ins (%input, %filter: memref, memref) + outs (%output: memref) + return +} + +// ----- + // CHECK-LABEL: func @pooling_nhwc_sum_tensor // CHECK: %{{.+}} = linalg.pooling_nhwc_sum // CHECK-SAME: dilations = dense<1> : tensor<2xi64>