From cf5e272878a75c9819d0fee89bd11ce4fcf3f43c Mon Sep 17 00:00:00 2001 From: Daniil Osokin Date: Sun, 24 Feb 2013 23:17:40 +0400 Subject: [PATCH] Added multithreaded implementation for RGB to YUV420p color conversion --- modules/imgproc/src/color.cpp | 114 +++++++++++++++++++++++++++--------------- 1 file changed, 75 insertions(+), 39 deletions(-) diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 82c2195..3799d43 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -2744,6 +2744,16 @@ const int ITUR_BT_601_CVG = -852492; const int ITUR_BT_601_CVR = 1673527; const int ITUR_BT_601_SHIFT = 20; +// Coefficients for RGB to YUV420p conversion +const int ITUR_BT_601_CRY = 269484; +const int ITUR_BT_601_CGY = 528482; +const int ITUR_BT_601_CBY = 102760; +const int ITUR_BT_601_CRU = -155188; +const int ITUR_BT_601_CGU = -305135; +const int ITUR_BT_601_CBU = 460324; +const int ITUR_BT_601_CGV = -385875; +const int ITUR_BT_601_CBV = -74448; + template struct YUV420sp2RGB888Invoker { @@ -3078,54 +3088,80 @@ inline void cvtYUV420p2RGBA(Mat& _dst, int _stride, const uchar* _y1, const ucha ///////////////////////////////////// RGB -> YUV420p ///////////////////////////////////// -template -static void cvtRGBtoYUV420p(const Mat& src, Mat& dst) +template +struct RGB888toYUV420pInvoker: public ParallelLoopBody { - //const float coeffs[] = { 0.257f, 0.504f, 0.098f, - // -0.148f, -0.291f, 0.439f, - // -0.368f, -0.071f }; - const int coeffs[] = { 269484, 528482, 102760, - -155188, -305135, 460324, - -385875, -74448 }; - - const int w = src.cols; - const int h = src.rows; + RGB888toYUV420pInvoker( const Mat& src, Mat* dst, const int uIdx ) + : src_(src), + dst_(dst), + uIdx_(uIdx) { } - const int cn = src.channels(); - for( int i = 0; i < h / 2; i++ ) + void operator()(const Range& rowRange) const { - const uchar* row0 = src.ptr(2*i); - const uchar* row1 = src.ptr(2*i + 1); - - uchar* y = dst.ptr(2*i); - uchar* u = dst.ptr(h + i/2) + (i % 2) * (w/2); - uchar* v = dst.ptr(h + (i + h/2)/2) + ((i + h/2) % 2) * (w/2); - if( uIdx == 2 ) std::swap(u, v); + const int w = src_.cols; + const int h = src_.rows; - for( int j = 0, k = 0; j < w * cn; j += 2*cn, k++ ) + const int cn = src_.channels(); + for( int i = rowRange.start; i < rowRange.end; i++ ) { - int r00 = row0[2-bIdx + j]; int g00 = row0[1 + j]; int b00 = row0[bIdx + j]; - int r01 = row0[2-bIdx + cn + j]; int g01 = row0[1 + cn + j]; int b01 = row0[bIdx + cn + j]; - int r10 = row1[2-bIdx + j]; int g10 = row1[1 + j]; int b10 = row1[bIdx + j]; - int r11 = row1[2-bIdx + cn + j]; int g11 = row1[1 + cn + j]; int b11 = row1[bIdx + cn + j]; - - int y00 = coeffs[0]*r00 + coeffs[1]*g00 + coeffs[2]*b00 + (1 << (ITUR_BT_601_SHIFT - 1)) + (16 << ITUR_BT_601_SHIFT); - int y01 = coeffs[0]*r01 + coeffs[1]*g01 + coeffs[2]*b01 + (1 << (ITUR_BT_601_SHIFT - 1)) + (16 << ITUR_BT_601_SHIFT); - int y10 = coeffs[0]*r10 + coeffs[1]*g10 + coeffs[2]*b10 + (1 << (ITUR_BT_601_SHIFT - 1)) + (16 << ITUR_BT_601_SHIFT); - int y11 = coeffs[0]*r11 + coeffs[1]*g11 + coeffs[2]*b11 + (1 << (ITUR_BT_601_SHIFT - 1)) + (16 << ITUR_BT_601_SHIFT); - - y[2*k + 0] = saturate_cast(y00 >> ITUR_BT_601_SHIFT); - y[2*k + 1] = saturate_cast(y01 >> ITUR_BT_601_SHIFT); - y[2*k + dst.step + 0] = saturate_cast(y10 >> ITUR_BT_601_SHIFT); - y[2*k + dst.step + 1] = saturate_cast(y11 >> ITUR_BT_601_SHIFT); + const uchar* row0 = src_.ptr(2 * i); + const uchar* row1 = src_.ptr(2 * i + 1); - int u00 = coeffs[3]*r00 + coeffs[4]*g00 + coeffs[5]*b00 + (1 << (ITUR_BT_601_SHIFT - 1)) + (128 << ITUR_BT_601_SHIFT); - int v00 = coeffs[5]*r00 + coeffs[6]*g00 + coeffs[7]*b00 + (1 << (ITUR_BT_601_SHIFT - 1)) + (128 << ITUR_BT_601_SHIFT); + uchar* y = dst_->ptr(2*i); + uchar* u = dst_->ptr(h + i/2) + (i % 2) * (w/2); + uchar* v = dst_->ptr(h + (i + h/2)/2) + ((i + h/2) % 2) * (w/2); + if( uIdx_ == 2 ) std::swap(u, v); - u[k] = saturate_cast(u00 >> ITUR_BT_601_SHIFT); - v[k] = saturate_cast(v00 >> ITUR_BT_601_SHIFT); + for( int j = 0, k = 0; j < w * cn; j += 2 * cn, k++ ) + { + int r00 = row0[2-bIdx + j]; int g00 = row0[1 + j]; int b00 = row0[bIdx + j]; + int r01 = row0[2-bIdx + cn + j]; int g01 = row0[1 + cn + j]; int b01 = row0[bIdx + cn + j]; + int r10 = row1[2-bIdx + j]; int g10 = row1[1 + j]; int b10 = row1[bIdx + j]; + int r11 = row1[2-bIdx + cn + j]; int g11 = row1[1 + cn + j]; int b11 = row1[bIdx + cn + j]; + + const int shifted16 = (16 << ITUR_BT_601_SHIFT); + const int halfShift = (1 << (ITUR_BT_601_SHIFT - 1)); + int y00 = ITUR_BT_601_CRY * r00 + ITUR_BT_601_CGY * g00 + ITUR_BT_601_CBY * b00 + halfShift + shifted16; + int y01 = ITUR_BT_601_CRY * r01 + ITUR_BT_601_CGY * g01 + ITUR_BT_601_CBY * b01 + halfShift + shifted16; + int y10 = ITUR_BT_601_CRY * r10 + ITUR_BT_601_CGY * g10 + ITUR_BT_601_CBY * b10 + halfShift + shifted16; + int y11 = ITUR_BT_601_CRY * r11 + ITUR_BT_601_CGY * g11 + ITUR_BT_601_CBY * b11 + halfShift + shifted16; + + y[2*k + 0] = saturate_cast(y00 >> ITUR_BT_601_SHIFT); + y[2*k + 1] = saturate_cast(y01 >> ITUR_BT_601_SHIFT); + y[2*k + dst_->step + 0] = saturate_cast(y10 >> ITUR_BT_601_SHIFT); + y[2*k + dst_->step + 1] = saturate_cast(y11 >> ITUR_BT_601_SHIFT); + + const int shifted128 = (128 << ITUR_BT_601_SHIFT); + int u00 = ITUR_BT_601_CRU * r00 + ITUR_BT_601_CGU * g00 + ITUR_BT_601_CBU * b00 + halfShift + shifted128; + int v00 = ITUR_BT_601_CBU * r00 + ITUR_BT_601_CGV * g00 + ITUR_BT_601_CBV * b00 + halfShift + shifted128; + + u[k] = saturate_cast(u00 >> ITUR_BT_601_SHIFT); + v[k] = saturate_cast(v00 >> ITUR_BT_601_SHIFT); + } } } + + static bool isFit( const Mat& src ) + { + return (src.total() >= 320*240); + } + +private: + RGB888toYUV420pInvoker& operator=(const RGB888toYUV420pInvoker&); + + const Mat& src_; + Mat* const dst_; + const int uIdx_; +}; + +template +static void cvtRGBtoYUV420p(const Mat& src, Mat& dst) +{ + RGB888toYUV420pInvoker colorConverter(src, &dst, uIdx); + if( RGB888toYUV420pInvoker::isFit(src) ) + parallel_for_(Range(0, src.rows/2), colorConverter); + else + colorConverter(Range(0, src.rows/2)); } ///////////////////////////////////// YUV422 -> RGB ///////////////////////////////////// -- 2.7.4