Publishing R3
[platform/upstream/dldt.git] / inference-engine / src / cldnn_engine / cldnn_global_custom_kernels / cldnn_global_custom_kernels.xml
1 <CustomLayer name="PriorBoxClustered" type="SimpleGPU" version="1">
2     <Kernel entry="prior_box_clustered">
3         <!-- Path should be relative to executable, so aligning with how it will really put in CI -->
4         <Source filename="prior_box_clustered.cl"/>
5         <Define name="min_size_" type="float" param="min_size" default="0.0"/>
6         <Define name="max_size_" type="float" param="max_size" default="0.0"/>
7         <Define name="aspect_ratio_" type="float" param="aspect_ratio" default="0.0"/>
8         <Define name="flip_" type="int" param="flip" default="1"/>
9         <Define name="clip_" type="int" param="clip" default="0"/>
10         <Define name="variance_" type="float[]" param="variance" default="0.1"/>
11         <Define name="img_size_" type="int" param="img_size" default="0"/>
12         <Define name="img_h_" type="int" param="img_h" default="0"/>
13         <Define name="img_w_" type="int" param="img_w" default="0"/>
14         <Define name="step_" type="float" param="step" default="0.0"/>
15         <Define name="step_w_" type="float" param="step_w" default="0.0"/>
16         <Define name="step_h_" type="float" param="step_h" default="0.0"/>
17         <Define name="offset_" type="float" param="offset" default="0.0"/>
18         <Define name="width_" type="float[]" param="width" default="0.0"/>
19         <Define name="height_" type="float[]" param="height" default="0.0"/>
20     </Kernel>
21     <Buffers>
22         <Tensor arg-index="0" type="input" port-index="0"  format="BFYX"/>
23         <Tensor arg-index="1" type="input" port-index="1"  format="BFYX"/>
24         <Tensor arg-index="2" type="output" port-index="0" format="BFYX"/>
25     </Buffers>
26
27     <CompilerOptions options="-cl-mad-enable"/>
28     <WorkSizes dim="input" global="Y,X"/>
29     <!-- define the global worksize. The formulas can use the values of the B,F,Y,X dimensions and contain the operators: +,-,/,*,% (all evaluated in integer arithmetics) Default value: global="B*F*Y*X,1,1"-->
30 </CustomLayer>
31
32 <CustomLayer name="Interp" type="SimpleGPU" version="1">
33     <Kernel entry="interp">
34         <Source filename="interp.cl"/>
35         <Define name="pad_beg_" type="int" param="pad_beg" default="0"/>
36         <Define name="pad_end_" type="int" param="pad_end" default="0"/>
37     </Kernel>
38     <Buffers>
39         <Tensor arg-index="0" type="input" port-index="0"  format="ANY"/>
40         <Tensor arg-index="1" type="output" port-index="0" format="ANY"/>
41     </Buffers>
42
43     <CompilerOptions options="-cl-mad-enable"/>
44     <WorkSizes global="Y,((X + 31)/32)*32" local="1,32"/>
45 </CustomLayer>
46
47 <CustomLayer name="GRN" type="SimpleGPU" version="1">
48     <Kernel entry="grn">
49         <Source filename="grn.cl"/>
50         <Define name="bias_" type="float" param="bias" default="1.0f"/>
51         <Define name="ACCUMULATOR_TYPE" default="float"/>
52       </Kernel>
53     <Buffers>
54         <Tensor arg-index="0" type="input" port-index="0"  format="BFYX"/>
55         <Tensor arg-index="1" type="output" port-index="0" format="BFYX"/>
56     </Buffers>
57
58     <CompilerOptions options="-cl-mad-enable"/>
59     <WorkSizes global="B,Y"/>
60 </CustomLayer>
61
62 <CustomLayer name="Tile" type="SimpleGPU" version="1">
63     <Kernel entry="tile">
64         <Source filename="tile.cl"/>
65         <Define name="axis_" type="int" param="axis" default="1"/>
66         <Define name="tiles_" type="int" param="tiles"/>
67     </Kernel>
68     <Buffers>
69         <Tensor arg-index="0" type="input" port-index="0"  format="BFYX"/>
70         <Tensor arg-index="1" type="output" port-index="0" format="BFYX"/>
71     </Buffers>
72
73     <CompilerOptions options="-cl-mad-enable"/>
74     <WorkSizes global="1,1,1"/>
75 </CustomLayer>
76
77 <CustomLayer name="CTCGreedyDecoder" type="SimpleGPU" version="1">
78     <Kernel entry="ctc_greedy_decoder">
79         <Source filename="ctc_greedy_decoder.cl"/>
80         <Define name="ctc_merge_repeated_" type="int" param="ctc_merge_repeated" default="1"/>
81       </Kernel>
82     <Buffers>
83         <Tensor arg-index="0" type="input" port-index="0"  format="BFYX"/>
84         <Tensor arg-index="1" type="input" port-index="1"  format="BFYX"/>
85         <Tensor arg-index="2" type="output" port-index="0" format="BFYX"/>
86     </Buffers>
87
88     <CompilerOptions options="-cl-mad-enable"/>
89     <WorkSizes global="1,1"/>
90 </CustomLayer>