1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html
10 CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
12 void cvt16f32f(const float16_t* src, float* dst, int len);
13 void cvt32f16f(const float* src, float16_t* dst, int len);
14 void addRNGBias32f(float* arr, const float* scaleBiasPairs, int len);
15 void addRNGBias64f(double* arr, const double* scaleBiasPairs, int len);
17 CV_CPU_OPTIMIZATION_NAMESPACE_END
18 } // namespace cv::hal
20 CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
22 BinaryFunc getConvertFunc(int sdepth, int ddepth);
24 CV_CPU_OPTIMIZATION_NAMESPACE_END
26 #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
29 CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
31 BinaryFunc getConvertFunc(int sdepth, int ddepth);
33 void cvt16f32f( const float16_t* src, float* dst, int len )
35 CV_INSTRUMENT_REGION();
38 const int VECSZ = v_float32::nlanes;
39 for( ; j < len; j += VECSZ )
47 v_store(dst + j, vx_load_expand(src + j));
51 dst[j] = (float)src[j];
54 void cvt32f16f( const float* src, float16_t* dst, int len )
56 CV_INSTRUMENT_REGION();
59 const int VECSZ = v_float32::nlanes;
60 for( ; j < len; j += VECSZ )
68 v_pack_store(dst + j, vx_load(src + j));
72 dst[j] = float16_t(src[j]);
75 void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len )
77 CV_INSTRUMENT_REGION();
78 // the loop is simple enough, so we let the compiler to vectorize it
79 for( int i = 0; i < len; i++ )
80 arr[i] += scaleBiasPairs[i*2 + 1];
83 void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len )
85 CV_INSTRUMENT_REGION();
86 // the loop is simple enough, so we let the compiler to vectorize it
87 for( int i = 0; i < len; i++ )
88 arr[i] += scaleBiasPairs[i*2 + 1];
91 CV_CPU_OPTIMIZATION_NAMESPACE_END
92 } // namespace cv::hal
95 CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
97 template<typename _Ts, typename _Td, typename _Twvec> static inline void
98 cvt_( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size )
100 sstep /= sizeof(src[0]);
101 dstep /= sizeof(dst[0]);
103 for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
107 const int VECSZ = _Twvec::nlanes*2;
108 for( ; j < size.width; j += VECSZ )
110 if( j > size.width - VECSZ )
112 if( j == 0 || src == (_Ts*)dst )
114 j = size.width - VECSZ;
117 vx_load_pair_as(src + j, v0, v1);
118 v_store_pair_as(dst + j, v0, v1);
121 for( ; j < size.width; j++ )
122 dst[j] = saturate_cast<_Td>(src[j]);
126 // in order to reduce the code size, for (16f <-> ...) conversions
127 // we add a conversion function without loop unrolling
128 template<typename _Ts, typename _Td, typename _Twvec> static inline void
129 cvt1_( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size )
131 sstep /= sizeof(src[0]);
132 dstep /= sizeof(dst[0]);
134 for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
138 const int VECSZ = _Twvec::nlanes;
139 for( ; j < size.width; j += VECSZ )
141 if( j > size.width - VECSZ )
143 if( j == 0 || src == (_Ts*)dst )
145 j = size.width - VECSZ;
148 vx_load_as(src + j, v);
149 v_store_as(dst + j, v);
153 for( ; j < size.width; j++ )
154 dst[j] = saturate_cast<_Td>(src[j]);
158 static void cvtCopy( const uchar* src, size_t sstep,
159 uchar* dst, size_t dstep, Size size, size_t elemsize)
161 size_t len = size.width*elemsize;
162 for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
164 memcpy( dst, src, len );
168 #define DEF_CVT_FUNC(suffix, cvtfunc, _Ts, _Td, _Twvec) \
169 static void cvt##suffix(const uchar* src_, size_t sstep, const uchar*, size_t, \
170 uchar* dst_, size_t dstep, Size size, void*) \
172 CV_INSTRUMENT_REGION(); \
173 const _Ts* src = (const _Ts*)src_; \
174 _Td* dst = (_Td*)dst_; \
175 cvtfunc<_Ts, _Td, _Twvec>(src, sstep, dst, dstep, size); \
178 ////////////////////// 8u -> ... ////////////////////////
180 DEF_CVT_FUNC(8u8s, cvt_, uchar, schar, v_int16)
181 DEF_CVT_FUNC(8u16u, cvt_, uchar, ushort, v_uint16)
182 DEF_CVT_FUNC(8u16s, cvt_, uchar, short, v_int16)
183 DEF_CVT_FUNC(8u32s, cvt_, uchar, int, v_int32)
184 DEF_CVT_FUNC(8u32f, cvt_, uchar, float, v_float32)
185 DEF_CVT_FUNC(8u64f, cvt_, uchar, double, v_int32)
186 DEF_CVT_FUNC(8u16f, cvt1_, uchar, float16_t, v_float32)
188 ////////////////////// 8s -> ... ////////////////////////
190 DEF_CVT_FUNC(8s8u, cvt_, schar, uchar, v_int16)
191 DEF_CVT_FUNC(8s16u, cvt_, schar, ushort, v_uint16)
192 DEF_CVT_FUNC(8s16s, cvt_, schar, short, v_int16)
193 DEF_CVT_FUNC(8s32s, cvt_, schar, int, v_int32)
194 DEF_CVT_FUNC(8s32f, cvt_, schar, float, v_float32)
195 DEF_CVT_FUNC(8s64f, cvt_, schar, double, v_int32)
196 DEF_CVT_FUNC(8s16f, cvt1_, schar, float16_t, v_float32)
198 ////////////////////// 16u -> ... ////////////////////////
200 DEF_CVT_FUNC(16u8u, cvt_, ushort, uchar, v_uint16)
201 DEF_CVT_FUNC(16u8s, cvt_, ushort, schar, v_uint16)
202 DEF_CVT_FUNC(16u16s, cvt_, ushort, short, v_int32)
203 DEF_CVT_FUNC(16u32s, cvt_, ushort, int, v_int32)
204 DEF_CVT_FUNC(16u32f, cvt_, ushort, float, v_float32)
205 DEF_CVT_FUNC(16u64f, cvt_, ushort, double, v_int32)
206 DEF_CVT_FUNC(16u16f, cvt1_,ushort, float16_t, v_float32)
208 ////////////////////// 16s -> ... ////////////////////////
210 DEF_CVT_FUNC(16s8u, cvt_, short, uchar, v_int16)
211 DEF_CVT_FUNC(16s8s, cvt_, short, schar, v_int16)
212 DEF_CVT_FUNC(16s16u, cvt_, short, ushort, v_int32)
213 DEF_CVT_FUNC(16s32s, cvt_, short, int, v_int32)
214 DEF_CVT_FUNC(16s32f, cvt_, short, float, v_float32)
215 DEF_CVT_FUNC(16s64f, cvt_, short, double, v_int32)
216 DEF_CVT_FUNC(16s16f, cvt1_,short, float16_t, v_float32)
218 ////////////////////// 32s -> ... ////////////////////////
220 DEF_CVT_FUNC(32s8u, cvt_, int, uchar, v_int32)
221 DEF_CVT_FUNC(32s8s, cvt_, int, schar, v_int32)
222 DEF_CVT_FUNC(32s16u, cvt_, int, ushort, v_int32)
223 DEF_CVT_FUNC(32s16s, cvt_, int, short, v_int32)
224 DEF_CVT_FUNC(32s32f, cvt_, int, float, v_float32)
225 DEF_CVT_FUNC(32s64f, cvt_, int, double, v_int32)
226 DEF_CVT_FUNC(32s16f, cvt1_,int, float16_t, v_float32)
228 ////////////////////// 32f -> ... ////////////////////////
230 DEF_CVT_FUNC(32f8u, cvt_, float, uchar, v_float32)
231 DEF_CVT_FUNC(32f8s, cvt_, float, schar, v_float32)
232 DEF_CVT_FUNC(32f16u, cvt_, float, ushort, v_float32)
233 DEF_CVT_FUNC(32f16s, cvt_, float, short, v_float32)
234 DEF_CVT_FUNC(32f32s, cvt_, float, int, v_float32)
235 DEF_CVT_FUNC(32f64f, cvt_, float, double, v_float32)
236 DEF_CVT_FUNC(32f16f, cvt1_,float, float16_t, v_float32)
238 ////////////////////// 64f -> ... ////////////////////////
240 DEF_CVT_FUNC(64f8u, cvt_, double, uchar, v_int32)
241 DEF_CVT_FUNC(64f8s, cvt_, double, schar, v_int32)
242 DEF_CVT_FUNC(64f16u, cvt_, double, ushort, v_int32)
243 DEF_CVT_FUNC(64f16s, cvt_, double, short, v_int32)
244 DEF_CVT_FUNC(64f32s, cvt_, double, int, v_int32)
245 DEF_CVT_FUNC(64f32f, cvt_, double, float, v_float32)
246 DEF_CVT_FUNC(64f16f, cvt1_,double, float16_t, v_float32)
248 ////////////////////// 16f -> ... ////////////////////////
250 DEF_CVT_FUNC(16f8u, cvt_, float16_t, uchar, v_float32)
251 DEF_CVT_FUNC(16f8s, cvt_, float16_t, schar, v_float32)
252 DEF_CVT_FUNC(16f16u, cvt1_, float16_t, ushort, v_float32)
253 DEF_CVT_FUNC(16f16s, cvt1_, float16_t, short, v_float32)
254 DEF_CVT_FUNC(16f32s, cvt1_, float16_t, int, v_float32)
255 DEF_CVT_FUNC(16f32f, cvt1_, float16_t, float, v_float32)
256 DEF_CVT_FUNC(16f64f, cvt1_, float16_t, double, v_float32)
258 ///////////// "conversion" w/o conversion ///////////////
260 static void cvt8u(const uchar* src, size_t sstep, const uchar*, size_t, uchar* dst, size_t dstep, Size size, void*)
261 { CV_INSTRUMENT_REGION(); cvtCopy(src, sstep, dst, dstep, size, 1); }
263 static void cvt16u(const uchar* src, size_t sstep, const uchar*, size_t, uchar* dst, size_t dstep, Size size, void*)
264 { CV_INSTRUMENT_REGION(); cvtCopy((const uchar*)src, sstep, (uchar*)dst, dstep, size, 2); }
266 static void cvt32s(const uchar* src, size_t sstep, const uchar*, size_t, uchar* dst, size_t dstep, Size size, void*)
267 { CV_INSTRUMENT_REGION(); cvtCopy((const uchar*)src, sstep, (uchar*)dst, dstep, size, 4); }
269 static void cvt64s(const uchar* src, size_t sstep, const uchar*, size_t, uchar* dst, size_t dstep, Size size, void*)
270 { CV_INSTRUMENT_REGION(); cvtCopy((const uchar*)src, sstep, (uchar*)dst, dstep, size, 8); }
273 /* [TODO] Recover IPP calls
274 #if defined(HAVE_IPP)
275 #define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \
276 static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
277 dtype* dst, size_t dstep, Size size, double*) \
279 CV_IPP_RUN(src && dst, CV_INSTRUMENT_FUN_IPP(ippiConvert_##ippFavor, src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height)) >= 0) \
280 cvt_(src, sstep, dst, dstep, size); \
283 #define DEF_CVT_FUNC_F2(suffix, stype, dtype, ippFavor) \
284 static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
285 dtype* dst, size_t dstep, Size size, double*) \
287 CV_IPP_RUN(src && dst, CV_INSTRUMENT_FUN_IPP(ippiConvert_##ippFavor, src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height), ippRndFinancial, 0) >= 0) \
288 cvt_(src, sstep, dst, dstep, size); \
291 #define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \
292 static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
293 dtype* dst, size_t dstep, Size size, double*) \
295 cvt_(src, sstep, dst, dstep, size); \
297 #define DEF_CVT_FUNC_F2 DEF_CVT_FUNC_F
300 #define DEF_CVT_FUNC(suffix, stype, dtype) \
301 static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
302 dtype* dst, size_t dstep, Size size, double*) \
304 cvt_(src, sstep, dst, dstep, size); \
307 #define DEF_CPY_FUNC(suffix, stype) \
308 static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
309 stype* dst, size_t dstep, Size size, double*) \
311 cpy_(src, sstep, dst, dstep, size); \
314 DEF_CPY_FUNC(8u, uchar)
315 DEF_CVT_FUNC_F(8s8u, schar, uchar, 8s8u_C1Rs)
316 DEF_CVT_FUNC_F(16u8u, ushort, uchar, 16u8u_C1R)
317 DEF_CVT_FUNC_F(16s8u, short, uchar, 16s8u_C1R)
318 DEF_CVT_FUNC_F(32s8u, int, uchar, 32s8u_C1R)
319 DEF_CVT_FUNC_F2(32f8u, float, uchar, 32f8u_C1RSfs)
320 DEF_CVT_FUNC(64f8u, double, uchar)
322 DEF_CVT_FUNC_F2(8u8s, uchar, schar, 8u8s_C1RSfs)
323 DEF_CVT_FUNC_F2(16u8s, ushort, schar, 16u8s_C1RSfs)
324 DEF_CVT_FUNC_F2(16s8s, short, schar, 16s8s_C1RSfs)
325 DEF_CVT_FUNC_F(32s8s, int, schar, 32s8s_C1R)
326 DEF_CVT_FUNC_F2(32f8s, float, schar, 32f8s_C1RSfs)
327 DEF_CVT_FUNC(64f8s, double, schar)
329 DEF_CVT_FUNC_F(8u16u, uchar, ushort, 8u16u_C1R)
330 DEF_CVT_FUNC_F(8s16u, schar, ushort, 8s16u_C1Rs)
331 DEF_CPY_FUNC(16u, ushort)
332 DEF_CVT_FUNC_F(16s16u, short, ushort, 16s16u_C1Rs)
333 DEF_CVT_FUNC_F2(32s16u, int, ushort, 32s16u_C1RSfs)
334 DEF_CVT_FUNC_F2(32f16u, float, ushort, 32f16u_C1RSfs)
335 DEF_CVT_FUNC(64f16u, double, ushort)
337 DEF_CVT_FUNC_F(8u16s, uchar, short, 8u16s_C1R)
338 DEF_CVT_FUNC_F(8s16s, schar, short, 8s16s_C1R)
339 DEF_CVT_FUNC_F2(16u16s, ushort, short, 16u16s_C1RSfs)
340 DEF_CVT_FUNC_F2(32s16s, int, short, 32s16s_C1RSfs)
341 DEF_CVT_FUNC(32f16s, float, short)
342 DEF_CVT_FUNC(64f16s, double, short)
344 DEF_CVT_FUNC_F(8u32s, uchar, int, 8u32s_C1R)
345 DEF_CVT_FUNC_F(8s32s, schar, int, 8s32s_C1R)
346 DEF_CVT_FUNC_F(16u32s, ushort, int, 16u32s_C1R)
347 DEF_CVT_FUNC_F(16s32s, short, int, 16s32s_C1R)
348 DEF_CPY_FUNC(32s, int)
349 DEF_CVT_FUNC_F2(32f32s, float, int, 32f32s_C1RSfs)
350 DEF_CVT_FUNC(64f32s, double, int)
352 DEF_CVT_FUNC_F(8u32f, uchar, float, 8u32f_C1R)
353 DEF_CVT_FUNC_F(8s32f, schar, float, 8s32f_C1R)
354 DEF_CVT_FUNC_F(16u32f, ushort, float, 16u32f_C1R)
355 DEF_CVT_FUNC_F(16s32f, short, float, 16s32f_C1R)
356 DEF_CVT_FUNC_F(32s32f, int, float, 32s32f_C1R)
357 DEF_CVT_FUNC(64f32f, double, float)
359 DEF_CVT_FUNC(8u64f, uchar, double)
360 DEF_CVT_FUNC(8s64f, schar, double)
361 DEF_CVT_FUNC(16u64f, ushort, double)
362 DEF_CVT_FUNC(16s64f, short, double)
363 DEF_CVT_FUNC(32s64f, int, double)
364 DEF_CVT_FUNC(32f64f, float, double)
365 DEF_CPY_FUNC(64s, int64)
368 BinaryFunc getConvertFunc(int sdepth, int ddepth)
370 static BinaryFunc cvtTab[][8] =
373 (cvt8u), (cvt8s8u), (cvt16u8u),
374 (cvt16s8u), (cvt32s8u), (cvt32f8u),
375 (cvt64f8u), (cvt16f8u)
378 (cvt8u8s), cvt8u, (cvt16u8s),
379 (cvt16s8s), (cvt32s8s), (cvt32f8s),
380 (cvt64f8s), (cvt16f8s)
383 (cvt8u16u), (cvt8s16u), cvt16u,
384 (cvt16s16u), (cvt32s16u), (cvt32f16u),
385 (cvt64f16u), (cvt16f16u)
388 (cvt8u16s), (cvt8s16s), (cvt16u16s),
389 cvt16u, (cvt32s16s), (cvt32f16s),
390 (cvt64f16s), (cvt16f16s)
393 (cvt8u32s), (cvt8s32s), (cvt16u32s),
394 (cvt16s32s), cvt32s, (cvt32f32s),
395 (cvt64f32s), (cvt16f32s)
398 (cvt8u32f), (cvt8s32f), (cvt16u32f),
399 (cvt16s32f), (cvt32s32f), cvt32s,
400 (cvt64f32f), (cvt16f32f)
403 (cvt8u64f), (cvt8s64f), (cvt16u64f),
404 (cvt16s64f), (cvt32s64f), (cvt32f64f),
405 (cvt64s), (cvt16f64f)
408 (cvt8u16f), (cvt8s16f), (cvt16u16f), (cvt16s16f),
409 (cvt32s16f), (cvt32f16f), (cvt64f16f), (cvt16u)
412 return cvtTab[CV_MAT_DEPTH(ddepth)][CV_MAT_DEPTH(sdepth)];
415 CV_CPU_OPTIMIZATION_NAMESPACE_END