_hist.create(BINS, 1, ddepth);
UMat src = _src.getUMat(), ghist(1, BINS * compunits, CV_32SC1),
- hist = ddepth == CV_32S ? _hist.getUMat() : UMat(BINS, 1, CV_32SC1);
+ hist = _hist.getUMat();
k1.args(ocl::KernelArg::ReadOnly(src),
ocl::KernelArg::PtrWriteOnly(ghist), (int)src.total());
if (!k1.run(1, &globalsize, &wgs, false))
return false;
+ char cvt[40];
ocl::Kernel k2("merge_histogram", ocl::imgproc::histogram_oclsrc,
- format("-D BINS=%d -D HISTS_COUNT=%d -D WGS=%d",
- BINS, compunits, (int)wgs));
+ format("-D BINS=%d -D HISTS_COUNT=%d -D WGS=%d -D convertToHT=%s -D HT=%s",
+ BINS, compunits, (int)wgs, ocl::convertTypeStr(CV_32S, ddepth, 1, cvt),
+ ocl::typeToStr(ddepth)));
if (k2.empty())
return false;
k2.args(ocl::KernelArg::PtrReadOnly(ghist),
- ocl::KernelArg::PtrWriteOnly(hist));
- if (!k2.run(1, &wgs, &wgs, false))
- return false;
-
- if (hist.depth() != ddepth)
- hist.convertTo(_hist, ddepth);
- else
- _hist.getUMatRef() = hist;
+ ocl::KernelArg::WriteOnlyNoSize(hist));
- return true;
+ return k2.run(1, &wgs, &wgs, false);
}
static bool ocl_calcHist(InputArrayOfArrays images, OutputArray hist)
#define T uchar
#endif
+#define noconvert
+
__kernel void calculate_histogram(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * histptr, int total)
{
hist[i] = localhist[i];
}
-__kernel void merge_histogram(__global const int * ghist, __global int * hist)
+#ifndef HT
+#define HT int
+#endif
+
+#ifndef convertToHT
+#define convertToHT noconvert
+#endif
+
+__kernel void merge_histogram(__global const int * ghist, __global uchar * histptr, int hist_step, int hist_offset)
{
int lid = get_local_id(0);
+ __global HT * hist = (__global HT *)(histptr + hist_offset);
+
#pragma unroll
for (int i = lid; i < BINS; i += WGS)
hist[i] = ghist[i];
ghist += BINS;
#pragma unroll
for (int j = lid; j < BINS; j += WGS)
- hist[j] += ghist[j];
+ hist[j] += convertToHT(ghist[j]);
barrier(CLK_LOCAL_MEM_FENCE);
}
}