1 <CustomLayer name="PriorBoxClustered" type="SimpleGPU" version="1">
2 <Kernel entry="prior_box_clustered">
3 <!-- Path should be relative to executable, so aligning with how it will really put in CI -->
4 <Source filename="prior_box_clustered.cl"/>
5 <Define name="min_size_" type="float" param="min_size" default="0.0"/>
6 <Define name="max_size_" type="float" param="max_size" default="0.0"/>
7 <Define name="aspect_ratio_" type="float" param="aspect_ratio" default="0.0"/>
8 <Define name="flip_" type="int" param="flip" default="1"/>
9 <Define name="clip_" type="int" param="clip" default="0"/>
10 <Define name="variance_" type="float[]" param="variance" default="0.1"/>
11 <Define name="img_size_" type="int" param="img_size" default="0"/>
12 <Define name="img_h_" type="int" param="img_h" default="0"/>
13 <Define name="img_w_" type="int" param="img_w" default="0"/>
14 <Define name="step_" type="float" param="step" default="0.0"/>
15 <Define name="step_w_" type="float" param="step_w" default="0.0"/>
16 <Define name="step_h_" type="float" param="step_h" default="0.0"/>
17 <Define name="offset_" type="float" param="offset" default="0.0"/>
18 <Define name="width_" type="float[]" param="width" default="0.0"/>
19 <Define name="height_" type="float[]" param="height" default="0.0"/>
22 <Tensor arg-index="0" type="input" port-index="0" format="BFYX"/>
23 <Tensor arg-index="1" type="input" port-index="1" format="BFYX"/>
24 <Tensor arg-index="2" type="output" port-index="0" format="BFYX"/>
27 <CompilerOptions options="-cl-mad-enable"/>
28 <WorkSizes dim="input" global="Y,X"/>
29 <!-- define the global worksize. The formulas can use the values of the B,F,Y,X dimensions and contain the operators: +,-,/,*,% (all evaluated in integer arithmetics) Default value: global="B*F*Y*X,1,1"-->
32 <CustomLayer name="Interp" type="SimpleGPU" version="1">
33 <Kernel entry="interp">
34 <Source filename="interp.cl"/>
35 <Define name="pad_beg_" type="int" param="pad_beg" default="0"/>
36 <Define name="pad_end_" type="int" param="pad_end" default="0"/>
39 <Tensor arg-index="0" type="input" port-index="0" format="ANY"/>
40 <Tensor arg-index="1" type="output" port-index="0" format="ANY"/>
43 <CompilerOptions options="-cl-mad-enable"/>
44 <WorkSizes global="Y,((X + 31)/32)*32" local="1,32"/>
47 <CustomLayer name="GRN" type="SimpleGPU" version="1">
49 <Source filename="grn.cl"/>
50 <Define name="bias_" type="float" param="bias" default="1.0f"/>
51 <Define name="ACCUMULATOR_TYPE" default="float"/>
54 <Tensor arg-index="0" type="input" port-index="0" format="BFYX"/>
55 <Tensor arg-index="1" type="output" port-index="0" format="BFYX"/>
58 <CompilerOptions options="-cl-mad-enable"/>
59 <WorkSizes global="B,Y"/>
62 <CustomLayer name="Tile" type="SimpleGPU" version="1">
64 <Source filename="tile.cl"/>
65 <Define name="axis_" type="int" param="axis" default="1"/>
66 <Define name="tiles_" type="int" param="tiles"/>
69 <Tensor arg-index="0" type="input" port-index="0" format="BFYX"/>
70 <Tensor arg-index="1" type="output" port-index="0" format="BFYX"/>
73 <CompilerOptions options="-cl-mad-enable"/>
74 <WorkSizes global="1,1,1"/>
77 <CustomLayer name="CTCGreedyDecoder" type="SimpleGPU" version="1">
78 <Kernel entry="ctc_greedy_decoder">
79 <Source filename="ctc_greedy_decoder.cl"/>
80 <Define name="ctc_merge_repeated_" type="int" param="ctc_merge_repeated" default="1"/>
83 <Tensor arg-index="0" type="input" port-index="0" format="BFYX"/>
84 <Tensor arg-index="1" type="input" port-index="1" format="BFYX"/>
85 <Tensor arg-index="2" type="output" port-index="0" format="BFYX"/>
88 <CompilerOptions options="-cl-mad-enable"/>
89 <WorkSizes global="1,1"/>