1 // Copyright (C) 2018-2019 Intel Corporation
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
19 #define _CAT(a,b) a##b
20 #define CAT(a,b) _CAT(a,b)
22 inline void interpolate(const int N, const int C,
23 const __global INPUT0_TYPE* src, const int x1, const int y1,
24 const int IH_pad, const int IW_pad, const int IH, const int IW,
25 __global OUTPUT0_TYPE* dst, const int x2, const int y2,
26 const int OH_pad, const int OW_pad, const int OH, const int OW)
32 rh = (OH_pad > 1) ? (IH_pad - 1) / (INPUT0_TYPE)(OH_pad - 1) : (INPUT0_TYPE)0.0f;
33 rw = (OW_pad > 1) ? (IW_pad - 1) / (INPUT0_TYPE)(OW_pad - 1) : (INPUT0_TYPE)0.0f;
35 rh = (IH_pad) / (INPUT0_TYPE)(OH_pad);
36 rw = (IW_pad) / (INPUT0_TYPE)(OW_pad);
39 int h = get_global_id(0);
40 int w = get_global_id(1);
45 INPUT0_TYPE fh = rh * (INPUT0_TYPE)h;
47 int ih1 = (ih0 < IH_pad - 1) ? ih0+1 : ih0;
49 INPUT0_TYPE h_lambda0 = fh - ih0;
50 INPUT0_TYPE h_lambda1 = (INPUT0_TYPE)(1.0f) - h_lambda0;
51 INPUT0_TYPE fw = rw * w;
53 int iw1 = (iw0 < IW_pad - 1) ? iw0 + 1 : iw0;
55 INPUT0_TYPE w_lambda0 = fw - iw0;
56 INPUT0_TYPE w_lambda1 = (INPUT0_TYPE)(1.0f) - w_lambda0;
58 const __global INPUT0_TYPE* psrc00 = src + (y1 + ih0)*INPUT0_PITCHES[2] + (x1 + iw0)*INPUT0_PITCHES[3];
59 const __global INPUT0_TYPE* psrc01 = src + (y1 + ih0)*INPUT0_PITCHES[2] + (x1 + iw1)*INPUT0_PITCHES[3];
60 const __global INPUT0_TYPE* psrc10 = src + (y1 + ih1)*INPUT0_PITCHES[2] + (x1 + iw0)*INPUT0_PITCHES[3];
61 const __global INPUT0_TYPE* psrc11 = src + (y1 + ih1)*INPUT0_PITCHES[2] + (x1 + iw1)*INPUT0_PITCHES[3];
63 __global OUTPUT0_TYPE* pdst = dst + (y2 + h)*OUTPUT0_PITCHES[2] + (x2 + w)*OUTPUT0_PITCHES[3];
65 #if defined(INPUT0_FORMAT_YXFB) && defined(OUTPUT0_FORMAT_YXFB)
66 typedef CAT(INPUT0_TYPE, VEC_SIZE) vec16_t;
68 const __global vec16_t* pvsrc00 = (const __global vec16_t*)psrc00;
69 const __global vec16_t* pvsrc01 = (const __global vec16_t*)psrc01;
70 const __global vec16_t* pvsrc10 = (const __global vec16_t*)psrc10;
71 const __global vec16_t* pvsrc11 = (const __global vec16_t*)psrc11;
73 __global vec16_t* pvdst = (__global vec16_t*)pdst;
76 for (int n = 0; n < N; n++)
79 #if defined(INPUT0_FORMAT_YXFB) && defined(OUTPUT0_FORMAT_YXFB)
80 __attribute__((opencl_unroll_hint))
81 for (int vc = 0; c <= C - VEC_SIZE; c += VEC_SIZE, vc++)
83 int in_idx = (n*INPUT0_PITCHES[0] + vc*INPUT0_PITCHES[1]);
84 int out_idx = (n*OUTPUT0_PITCHES[0] + vc*OUTPUT0_PITCHES[1]);
85 pvdst[out_idx] = (vec16_t)(h_lambda1 * (w_lambda1 * pvsrc00[in_idx] +
86 w_lambda0 * pvsrc01[in_idx]) +
87 h_lambda0 * (w_lambda1 * pvsrc10[in_idx] +
88 w_lambda0 * pvsrc11[in_idx]));
91 __attribute__((opencl_unroll_hint))
94 int in_idx = n*INPUT0_PITCHES[0] + c*INPUT0_PITCHES[1];
95 int out_idx = n*OUTPUT0_PITCHES[0] + c*OUTPUT0_PITCHES[1];
96 pdst[out_idx] = (OUTPUT0_TYPE)(h_lambda1 * (w_lambda1 * psrc00[in_idx] + w_lambda0 * psrc01[in_idx]) +
97 h_lambda0 * (w_lambda1 * psrc10[in_idx] + w_lambda0 * psrc11[in_idx]));
102 __kernel void interp(const __global INPUT0_TYPE* input,
103 __global OUTPUT0_TYPE* output)
105 int IB = INPUT0_DIMS[0];
106 int IF = INPUT0_DIMS[1];
107 int IY = INPUT0_DIMS[2];
108 int IX = INPUT0_DIMS[3];
110 int OY = OUTPUT0_DIMS[2];
111 int OX = OUTPUT0_DIMS[3];
113 int IY_pad = IY + pad_beg_ + pad_end_;
114 int IX_pad = IX + pad_beg_ + pad_end_;
116 interpolate(IB, IF, input + INPUT0_OFFSET, -pad_beg_, -pad_beg_, IY_pad, IX_pad, IY, IX, output + OUTPUT0_OFFSET, 0, 0, OY, OX, OY, OX);