2 // Copyright (c) 2016-2018 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include "convolution_kernel_selector.h"
18 #include "convolution_kernel_bfyx_ref.h"
19 #include "convolution_kernel_bfyx_1x1_opt.h"
20 #include "convolution_kernel_bfyx_gemm_like.h"
21 #include "convolution_kernel_bfyx_direct_10_12_16.h"
22 #include "convolution_kernel_bfyx_os_iyx_osv16.h"
23 #include "convolution_kernel_bfyx_os_iyx_osv16_2_sg.h"
24 #include "convolution_kernel_yxfb_ref.h"
25 #include "convolution_kernel_yxfb_yxio_b16.h"
26 #include "convolution_kernel_yxfb_yxio_b8.h"
27 #include "convolution_kernel_yxfb_yxio_b1_block.h"
28 #include "convolution_kernel_yxfb_yxio_b1_block_multiple_x.h"
29 #include "convolution_kernel_tutorial.h"
30 //#include "convolution_kernel_bfyx_3x3_dw_opt.h"
31 #include "convolution_kernel_winograd_2x3_s1.h"
32 #include "convolution_kernel_bfyx_1x1.h"
33 #include "convolution_kernel_bfyx_1x1_gemm_buf.h"
34 #include "convolution_kernel_winograd_2x3_s1_fused.h"
35 #include "convolution_kernel_winograd_6x3_s1_fused.h"
36 #include "convolution_kernel_MMAD.h"
37 #include "convolution_kernel_MMAD_blocks.h"
38 #include "convolution_kernel_1x1_gemm_MMAD.h"
39 #include "convolution_kernel_byxf_af32_depthwise.h"
40 #include "convolution_kernel_mmad_batched.h"
41 #include "convolution_kernel_bfyx_depthwise_weights_lwg.h"
42 #include "convolution_kernel_mmad_slm_2x14_rep4.h"
43 #include "convolution_kernel_mmad_slm_7x7_rep4.h"
44 #include "convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.h"
45 #include "convolution_kernel_mmad_batched_block.h"
46 #include "convolution_kernel_mmad_batched_block_1x1.h"
47 #include "convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.h"
48 #include "convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.h"
49 #include "convolution_kernel_mmad_32x32sg_slm_int8.h"
50 #include "convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.h"
51 #include "convolution_kernel_imad_3x3.h"
52 #include "convolution_kernel_imad_1x1.h"
53 #include "convolution_kernel_imad_7x7.h"
55 namespace kernel_selector
57 convolution_kernel_selector::convolution_kernel_selector()
59 Attach<ConvolutionKernel_bfyx_Ref>();
60 Attach<convolution_kernel_bfyx_1x1_opt>();
61 Attach<ConvolutionKernel_bfyx_GEMMLike>();
62 Attach<ConvolutionKernel_bfyx_Direct_10_10_12>();
63 Attach<ConvolutionKernel_bfyx_os_iyx_osv16>();
64 // commented out to not get in our way, will enable in future after autotuning
65 // Attach<ConvolutionKernel_bfyx_os_iyx_osv16_2_sg>();
66 Attach<ConvolutionKernel_yxfb_Ref>();
67 Attach<ConvolutionKernel_yxfb_yxio_b16>();
68 Attach<ConvolutionKernel_yxfb_yxio_b8>();
69 //Attach<ConvolutionKernel_yxfb_yxio_b1_block>(); // TODO: need to finish integration
70 Attach<ConvolutionKernel_yxfb_yxio_b1_block_mulitple_x>();
71 //Attach<ConvolutionKernel_bfyx_3x3_dw_opt>();
72 Attach<ConvolutionKernel_Winograd_2x3_s1>();
73 Attach<ConvolutionKernel_Winograd_2x3_s1_fused>();
74 Attach<ConvolutionKernel_Winograd_6x3_s1_fused>();
75 Attach<ConvolutionKernel_bfyx_1x1>();
76 Attach<ConvolutionKernel_bfyx_1x1_gemm_buf>();
77 Attach<ConvolutionKernel_MMAD>();
78 Attach<ConvolutionKernel_MMAD_blocks>();
79 Attach<ConvolutionKernel_1x1_gemm_MMAD>();
80 Attach<ConvolutionKernel_byxf_af32_depthiwise>();
81 Attach<ConvolutionKernel_mmad_batched>();
82 Attach<ConvolutionKernel_bfyx_depthwise_weights_lwg>();
83 Attach<ConvolutionKernel_mmad_slm_2x14_rep4>();
84 Attach<ConvolutionKernel_mmad_slm_7x7_rep4>();
85 Attach<ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8>();
86 Attach<ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8>();
87 Attach<ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32>();
88 Attach<ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32>();
89 Attach<ConvolutionKernel_mmad_batched_block>();
90 Attach<ConvolutionKernel_mmad_batched_block_1x1>();
91 // Attach<ConvolutionKernel_mmad_32x32sg_slm_int8>();
92 //Attach<ConvolutionKernel_Tutorial>(); //In order to use this implementation for tutorial purposes please uncomment this line
93 Attach<ConvolutionKernel_imad_3x3>();
94 Attach<ConvolutionKernel_imad_1x1>();
95 Attach<ConvolutionKernel_imad_7x7>();
98 KernelsData convolution_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
100 return GetAutoTuneBestKernel(params, options, KernelType::CONVOLUTION);