Add a section of how to link IE with CMake project (#99)
[platform/upstream/dldt.git] / inference-engine / thirdparty / mkl-dnn / src / cpu / jit_uni_x8s8s32x_1x1_conv_kernel.hpp
1 /*******************************************************************************
2 * Copyright 2018 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16
17 #ifndef JIT_UNI_X8S8S32X_1x1_CONV_KERNEL_HPP
18 #define JIT_UNI_X8S8S32X_1x1_CONV_KERNEL_HPP
19
20 #include "c_types_map.hpp"
21 #include "type_helpers.hpp"
22 #include "jit_generator.hpp"
23 #include "jit_primitive_conf.hpp"
24
25 namespace mkldnn {
26 namespace impl {
27 namespace cpu {
28
29 using Xbyak::Reg64;
30 using Xbyak::Ymm;
31 using Xbyak::Xmm;
32
33 template <cpu_isa_t isa>
34 struct jit_uni_x8s8s32x_1x1_conv_fwd_kernel: public jit_generator {
35     DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_x8s8s32x_1x1_conv_fwd_kernel)
36
37     jit_uni_x8s8s32x_1x1_conv_fwd_kernel(jit_1x1_conv_conf_t ajcp,
38         const primitive_attr_t &attr): jcp(ajcp), attr_(attr)
39     {
40         this->generate();
41         jit_ker = (void (*)(jit_1x1_conv_call_s *))this->getCode();
42     }
43
44     static bool post_ops_ok(jit_1x1_conv_conf_t &jcp,
45                             const primitive_attr_t &attr);
46     static status_t init_conf(jit_1x1_conv_conf_t &jcp,
47                               const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
48                               const memory_desc_wrapper &weights_d,
49                               const memory_desc_wrapper &dst_d,
50                               const memory_desc_wrapper &bias_pd,
51                               const primitive_attr_t &attr,
52                               bool with_relu = false, float relu_negative_slope = 0.f);
53
54     jit_1x1_conv_conf_t jcp;
55     const primitive_attr_t &attr_;
56     void (*jit_ker)(jit_1x1_conv_call_s *);
57
58 private:
59     using Vmm = typename utils::conditional3<isa == sse42, Xbyak::Xmm,
60             isa == avx2, Xbyak::Ymm, Xbyak::Zmm>::type;
61
62     Reg64 reg_weight_data = rsi;
63     Reg64 reg_src_data = abi_not_param1;
64     Reg64 reg_dst_data = rbx;
65     Reg64 reg_bias_data = r12;
66
67     Reg64 reg_scales = rdx;
68     Reg64 aux_reg_src_data = rdx;
69     Reg64 aux_reg_weight_data = rax;
70     Reg64 aux_reg_dst_data = rbp;
71     Reg64 reg_oc_loop_work = r9;
72     Reg64 reg_ow_loop_work = r10;
73     Reg64 reg_loop_os_iter = r14;
74     Reg64 reg_loop_ic_iter = r15;
75
76     Reg64 reg_scratch = r14;
77
78     Vmm vreg_sum_0 = Vmm(15);
79     Vmm vreg_src = Vmm(14);
80     Vmm vmm_bias = Vmm(15);
81     Vmm vmm_zero = Vmm(14);
82     Vmm vmm_one = Vmm(13);
83     Xmm xmm_one = Xmm(13);
84
85     void loop_os(int oc_loop_blk);
86     void ic_loop(int oc_loop_blk, int ur);
87
88     void generate();
89
90     bool maybe_relu(int position);
91     void cvt2ps(data_type_t type_in, Vmm vmm_in, const Xbyak::Operand &op);
92 };
93
94 }
95 }
96 }
97
98 #endif