+#if CV_NEON
+template<typename T> struct VSplit2;
+template<typename T> struct VSplit3;
+template<typename T> struct VSplit4;
+
+#define SPLIT2_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
+ template<> \
+ struct name<data_type>{ \
+ void operator()(const data_type* src, data_type* dst0, data_type* dst1){ \
+ reg_type r = load_func(src); \
+ store_func(dst0, r.val[0]); \
+ store_func(dst1, r.val[1]); \
+ } \
+ }
+
+#define SPLIT3_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
+ template<> \
+ struct name<data_type>{ \
+ void operator()(const data_type* src, data_type* dst0, data_type* dst1, \
+ data_type* dst2){ \
+ reg_type r = load_func(src); \
+ store_func(dst0, r.val[0]); \
+ store_func(dst1, r.val[1]); \
+ store_func(dst2, r.val[2]); \
+ } \
+ }
+
+#define SPLIT4_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
+ template<> \
+ struct name<data_type>{ \
+ void operator()(const data_type* src, data_type* dst0, data_type* dst1, \
+ data_type* dst2, data_type* dst3){ \
+ reg_type r = load_func(src); \
+ store_func(dst0, r.val[0]); \
+ store_func(dst1, r.val[1]); \
+ store_func(dst2, r.val[2]); \
+ store_func(dst3, r.val[3]); \
+ } \
+ }
+
+SPLIT2_KERNEL_TEMPLATE(VSplit2, uchar , uint8x16x2_t, vld2q_u8 , vst1q_u8 );
+SPLIT2_KERNEL_TEMPLATE(VSplit2, schar , int8x16x2_t, vld2q_s8 , vst1q_s8 );
+SPLIT2_KERNEL_TEMPLATE(VSplit2, ushort, uint16x8x2_t, vld2q_u16, vst1q_u16);
+SPLIT2_KERNEL_TEMPLATE(VSplit2, short , int16x8x2_t, vld2q_s16, vst1q_s16);
+SPLIT2_KERNEL_TEMPLATE(VSplit2, int , int32x4x2_t, vld2q_s32, vst1q_s32);
+SPLIT2_KERNEL_TEMPLATE(VSplit2, float , float32x4x2_t, vld2q_f32, vst1q_f32);
+SPLIT2_KERNEL_TEMPLATE(VSplit2, int64 , int64x1x2_t, vld2_s64 , vst1_s64 );
+
+SPLIT3_KERNEL_TEMPLATE(VSplit3, uchar , uint8x16x3_t, vld3q_u8 , vst1q_u8 );
+SPLIT3_KERNEL_TEMPLATE(VSplit3, schar , int8x16x3_t, vld3q_s8 , vst1q_s8 );
+SPLIT3_KERNEL_TEMPLATE(VSplit3, ushort, uint16x8x3_t, vld3q_u16, vst1q_u16);
+SPLIT3_KERNEL_TEMPLATE(VSplit3, short , int16x8x3_t, vld3q_s16, vst1q_s16);
+SPLIT3_KERNEL_TEMPLATE(VSplit3, int , int32x4x3_t, vld3q_s32, vst1q_s32);
+SPLIT3_KERNEL_TEMPLATE(VSplit3, float , float32x4x3_t, vld3q_f32, vst1q_f32);
+SPLIT3_KERNEL_TEMPLATE(VSplit3, int64 , int64x1x3_t, vld3_s64 , vst1_s64 );
+
+SPLIT4_KERNEL_TEMPLATE(VSplit4, uchar , uint8x16x4_t, vld4q_u8 , vst1q_u8 );
+SPLIT4_KERNEL_TEMPLATE(VSplit4, schar , int8x16x4_t, vld4q_s8 , vst1q_s8 );
+SPLIT4_KERNEL_TEMPLATE(VSplit4, ushort, uint16x8x4_t, vld4q_u16, vst1q_u16);
+SPLIT4_KERNEL_TEMPLATE(VSplit4, short , int16x8x4_t, vld4q_s16, vst1q_s16);
+SPLIT4_KERNEL_TEMPLATE(VSplit4, int , int32x4x4_t, vld4q_s32, vst1q_s32);
+SPLIT4_KERNEL_TEMPLATE(VSplit4, float , float32x4x4_t, vld4q_f32, vst1q_f32);
+SPLIT4_KERNEL_TEMPLATE(VSplit4, int64 , int64x1x4_t, vld4_s64 , vst1_s64 );
+#endif
+