template <int nthreads>
- __global__ void extract_descrs_by_rows_kernel(const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
- const float* block_hists, PtrStepf descriptors)
+ __global__ void extract_descrs_by_rows_kernel(const int img_block_width,
+ const int win_block_stride_x, const int win_block_stride_y,
+ const float* block_hists,
+ PtrStepf descriptors)
{
// Get left top corner of the window in src
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
}
- void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, int win_stride_y, int win_stride_x,
- int height, int width, float* block_hists, int cell_size_x, int ncells_block_x, PtrStepSzf descriptors)
+ void extract_descrs_by_rows(int win_height, int win_width,
+ int block_stride_y, int block_stride_x,
+ int win_stride_y, int win_stride_x,
+ int height, int width,
+ float* block_hists, int cell_size_x,
+ int ncells_block_x,
+ PtrStepSzf descriptors,
+ const cudaStream_t& stream)
{
const int nthreads = 256;
dim3 grid(img_win_width, img_win_height);
int img_block_width = (width - ncells_block_x * cell_size_x + block_stride_x) / block_stride_x;
- extract_descrs_by_rows_kernel<nthreads><<<grid, threads>>>(
- img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
- cudaSafeCall( cudaGetLastError() );
+ extract_descrs_by_rows_kernel<nthreads><<<grid, threads, 0, stream>>>(img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
- cudaSafeCall( cudaDeviceSynchronize() );
+ cudaSafeCall( cudaGetLastError() );
}
template <int nthreads>
- __global__ void extract_descrs_by_cols_kernel(const int img_block_width, const int win_block_stride_x,
- const int win_block_stride_y, const float* block_hists,
+ __global__ void extract_descrs_by_cols_kernel(const int img_block_width,
+ const int win_block_stride_x, const int win_block_stride_y,
+ const float* block_hists,
PtrStepf descriptors)
{
// Get left top corner of the window in src
}
- void compute_gradients_8UC1(int nbins, int height, int width, const PtrStepSzb& img,
- float angle_scale, PtrStepSzf grad, PtrStepSzb qangle, bool correct_gamma)
+ void compute_gradients_8UC1(int nbins,
+ int height, int width, const PtrStepSzb& img,
+ float angle_scale,
+ PtrStepSzf grad, PtrStepSzb qangle,
+ bool correct_gamma,
+ const cudaStream_t& stream)
{
(void)nbins;
const int nthreads = 256;
dim3 gdim(divUp(width, bdim.x), divUp(height, bdim.y));
if (correct_gamma)
- compute_gradients_8UC1_kernel<nthreads, 1><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
+ compute_gradients_8UC1_kernel<nthreads, 1><<<gdim, bdim, 0, stream>>>(height, width, img, angle_scale, grad, qangle);
else
- compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
+ compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim, 0, stream>>>(height, width, img, angle_scale, grad, qangle);
cudaSafeCall( cudaGetLastError() );
-
- cudaSafeCall( cudaDeviceSynchronize() );
}
{
namespace hog
{
- void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
+ void set_up_constants(int nbins,
+ int block_stride_x, int block_stride_y,
int nblocks_win_x, int nblocks_win_y,
int ncells_block_x, int ncells_block_y,
const cudaStream_t& stream);
- void compute_hists(int nbins, int block_stride_x, int block_stride_y,
- int height, int width, const PtrStepSzf& grad,
- const PtrStepSzb& qangle, float sigma, float* block_hists,
- int cell_size_x, int cell_size_y, int ncells_block_x, int ncells_block_y,
+ void compute_hists(int nbins,
+ int block_stride_x, int block_stride_y,
+ int height, int width,
+ const PtrStepSzf& grad, const PtrStepSzb& qangle,
+ float sigma,
+ float* block_hists,
+ int cell_size_x, int cell_size_y,
+ int ncells_block_x, int ncells_block_y,
const cudaStream_t& stream);
- void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
- int height, int width, float* block_hists, float threshold, int cell_size_x, int cell_size_y, int ncells_block_x, int ncells_block_y,
+ void normalize_hists(int nbins,
+ int block_stride_x, int block_stride_y,
+ int height, int width,
+ float* block_hists,
+ float threshold,
+ int cell_size_x, int cell_size_y,
+ int ncells_block_x, int ncells_block_y,
const cudaStream_t& stream);
void classify_hists(int win_height, int win_width, int block_stride_y,
float threshold, int cell_size_x, int ncells_block_x, unsigned char* labels);
void compute_confidence_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
- int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
- float* coefs, float free_coef, float threshold, int cell_size_x, int ncells_block_x, float *confidences);
-
- void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
- int win_stride_y, int win_stride_x, int height, int width, float* block_hists, int cell_size_x, int ncells_block_x,
- cv::cuda::PtrStepSzf descriptors);
- void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
- int win_stride_y, int win_stride_x, int height, int width, float* block_hists, int cell_size_x, int ncells_block_x,
+ int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
+ float* coefs, float free_coef, float threshold, int cell_size_x, int ncells_block_x, float *confidences);
+
+ void extract_descrs_by_rows(int win_height, int win_width,
+ int block_stride_y, int block_stride_x,
+ int win_stride_y, int win_stride_x,
+ int height, int width,
+ float* block_hists,
+ int cell_size_x, int ncells_block_x,
+ cv::cuda::PtrStepSzf descriptors,
+ const cudaStream_t& stream);
+ void extract_descrs_by_cols(int win_height, int win_width,
+ int block_stride_y, int block_stride_x,
+ int win_stride_y, int win_stride_x,
+ int height, int width,
+ float* block_hists,
+ int cell_size_x, int ncells_block_x,
cv::cuda::PtrStepSzf descriptors,
const cudaStream_t& stream);
- void compute_gradients_8UC1(int nbins, int height, int width, const cv::cuda::PtrStepSzb& img,
- float angle_scale, cv::cuda::PtrStepSzf grad, cv::cuda::PtrStepSzb qangle, bool correct_gamma);
- void compute_gradients_8UC4(int nbins, int height, int width, const cv::cuda::PtrStepSzb& img,
- float angle_scale, cv::cuda::PtrStepSzf grad, cv::cuda::PtrStepSzb qangle, bool correct_gamma,
+ void compute_gradients_8UC1(int nbins,
+ int height, int width, const cv::cuda::PtrStepSzb& img,
+ float angle_scale,
+ cv::cuda::PtrStepSzf grad, cv::cuda::PtrStepSzb qangle,
+ bool correct_gamma,
+ const cudaStream_t& stream);
+ void compute_gradients_8UC4(int nbins,
+ int height, int width, const cv::cuda::PtrStepSzb& img,
+ float angle_scale,
+ cv::cuda::PtrStepSzf grad, cv::cuda::PtrStepSzb qangle,
+ bool correct_gamma,
const cudaStream_t& stream);
void resize_8UC1(const cv::cuda::PtrStepSzb& src, cv::cuda::PtrStepSzb dst);
img.rows, img.cols,
block_hists.ptr<float>(),
cell_size_.width, cells_per_block_.width,
- descriptors);
+ descriptors,
+ StreamAccessor::getStream(stream));
break;
case DESCR_FORMAT_COL_BY_COL:
hog::extract_descrs_by_cols(win_size_.height, win_size_.width,
switch (img.type())
{
case CV_8UC1:
- hog::compute_gradients_8UC1(nbins_, img.rows, img.cols, img,
- angleScale, grad, qangle, gamma_correction_);
+ hog::compute_gradients_8UC1(nbins_,
+ img.rows, img.cols, img,
+ angleScale,
+ grad, qangle,
+ gamma_correction_,
+ StreamAccessor::getStream(stream));
break;
case CV_8UC4:
hog::compute_gradients_8UC4(nbins_,