- Context *clCxt = mat_src.clCxt;
- int channels = mat_src.oclchannels();
- int depth = mat_src.depth();
-
- string kernelName = "split_vector";
-
- int vector_lengths[4][7] = {{0, 0, 0, 0, 0, 0, 0},
- {4, 4, 2, 2, 1, 1, 1},
- {4, 4, 2, 2 , 1, 1, 1},
- {4, 4, 2, 2, 1, 1, 1}
- };
-
- size_t vector_length = vector_lengths[channels - 1][mat_dst[0].depth()];
-
- int max_offset_cols = 0;
- for(int i = 0; i < channels; i++)
+ Context *clCtx = src.clCxt;
+ int channels = src.channels();
+ int depth = src.depth();
+ depth = (depth == CV_8S) ? CV_8U : depth;
+ depth = (depth == CV_16S) ? CV_16U : depth;
+
+ String kernelName = "split_vector";
+
+ size_t VEC_SIZE = 4;
+
+ std::vector<std::pair<size_t , const void *> > args;
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step));
+ int srcOffsetXBytes = src.offset % src.step;
+ int srcOffsetY = src.offset / src.step;
+ cl_int2 srcOffset = {{srcOffsetXBytes, srcOffsetY}};
+ args.push_back( std::make_pair( sizeof(cl_int2), (void *)&srcOffset));
+
+ bool dst0Aligned = false, dst1Aligned = false, dst2Aligned = false, dst3Aligned = false;
+ int alignSize = dst[0].elemSize1() * VEC_SIZE;
+ int alignMask = alignSize - 1;
+
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst[0].data));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst[0].step));
+ int dst0OffsetXBytes = dst[0].offset % dst[0].step;
+ int dst0OffsetY = dst[0].offset / dst[0].step;
+ cl_int2 dst0Offset = {{dst0OffsetXBytes, dst0OffsetY}};
+ args.push_back( std::make_pair( sizeof(cl_int2), (void *)&dst0Offset));
+ if ((dst0OffsetXBytes & alignMask) == 0)
+ dst0Aligned = true;
+
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst[1].data));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst[1].step));
+ int dst1OffsetXBytes = dst[1].offset % dst[1].step;
+ int dst1OffsetY = dst[1].offset / dst[1].step;
+ cl_int2 dst1Offset = {{dst1OffsetXBytes, dst1OffsetY}};
+ args.push_back( std::make_pair( sizeof(cl_int2), (void *)&dst1Offset));
+ if ((dst1OffsetXBytes & alignMask) == 0)
+ dst1Aligned = true;
+
+ // DON'T MOVE VARIABLES INTO 'IF' BODY
+ int dst2OffsetXBytes, dst2OffsetY;
+ cl_int2 dst2Offset;
+ int dst3OffsetXBytes, dst3OffsetY;
+ cl_int2 dst3Offset;
+ if (channels >= 3)