Retina module is now parallelized thanks to the TBB library. Speed increase expected...
authornoob <noob@noob.(none)>
Wed, 29 Aug 2012 15:44:21 +0000 (17:44 +0200)
committernoob <noob@noob.(none)>
Wed, 29 Aug 2012 15:44:21 +0000 (17:44 +0200)
modules/contrib/src/basicretinafilter.cpp
modules/contrib/src/basicretinafilter.hpp
modules/contrib/src/magnoretinafilter.cpp
modules/contrib/src/magnoretinafilter.hpp
modules/contrib/src/parvoretinafilter.cpp
modules/contrib/src/parvoretinafilter.hpp
modules/contrib/src/retinacolor.cpp
modules/contrib/src/retinacolor.hpp
modules/contrib/src/templatebuffer.hpp

index 06320f8..a4270aa 100644 (file)
@@ -316,28 +316,50 @@ void BasicRetinaFilter::runFilter_LocalAdapdation_autonomous(const std::valarray
        _spatiotemporalLPfilter(get_data(inputFrame), &_filterOutput[0]);
        _localLuminanceAdaptation(get_data(inputFrame), &_filterOutput[0], &outputFrame[0]);
 }
-// local luminance adaptation of the input in regard of localLuminance buffer
-void BasicRetinaFilter::_localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame)
+
+// local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output
+void BasicRetinaFilter::_localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance)
 {
-       float meanLuminance=0;
-       const float *luminancePTR=inputFrame;
-       for (unsigned int i=0;i<_filterOutput.getNBpixels();++i)
-               meanLuminance+=*(luminancePTR++);
-       meanLuminance/=_filterOutput.getNBpixels();
-       //float tempMeanValue=meanLuminance+_meanInputValue*_tau;
+    _localLuminanceAdaptation(inputOutputFrame, localLuminance, inputOutputFrame, false);
+       
+    /*    const float *localLuminancePTR=localLuminance;
+       float *inputOutputFramePTR=inputOutputFrame;
+
+       for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputOutputFramePTR)
+       {
+               float X0=*(localLuminancePTR++)*_localLuminanceFactor+_localLuminanceAddon;
+               *(inputOutputFramePTR) = (_maxInputValue+X0)**inputOutputFramePTR/(*inputOutputFramePTR +X0+0.00000000001);
+       }
+      */  
+}
 
-       updateCompressionParameter(meanLuminance);
+// local luminance adaptation of the input in regard of localLuminance buffer
+void BasicRetinaFilter::_localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame, const bool updateLuminanceMean)
+{
+       if (updateLuminanceMean)
+       {       float meanLuminance=0;
+               const float *luminancePTR=inputFrame;
+               for (unsigned int i=0;i<_filterOutput.getNBpixels();++i)
+                       meanLuminance+=*(luminancePTR++);
+               meanLuminance/=_filterOutput.getNBpixels();
+               //float tempMeanValue=meanLuminance+_meanInputValue*_tau;
+               updateCompressionParameter(meanLuminance);
+       }
+#ifdef HAVE_TBB
+        tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_localAdaptation(localLuminance, inputFrame, outputFrame, _localLuminanceFactor, _localLuminanceAddon, _maxInputValue), tbb::auto_partitioner());
+#else
        //std::cout<<meanLuminance<<std::endl;
        const float *localLuminancePTR=localLuminance;
        const float *inputFramePTR=inputFrame;
        float *outputFramePTR=outputFrame;
-       for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputFramePTR)
+       for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputFramePTR, ++outputFramePTR)
        {
                float X0=*(localLuminancePTR++)*_localLuminanceFactor+_localLuminanceAddon;
                // TODO : the following line can lead to a divide by zero ! A small offset is added, take care if the offset is too large in case of High Dynamic Range images which can use very small values...               
-               *(outputFramePTR++) = (_maxInputValue+X0)**inputFramePTR/(*inputFramePTR +X0+0.00000000001f);
+               *(outputFramePTR) = (_maxInputValue+X0)**inputFramePTR/(*inputFramePTR +X0+0.00000000001);
                //std::cout<<"BasicRetinaFilter::inputFrame[IDpixel]=%f, X0=%f, outputFrame[IDpixel]=%f\n", inputFrame[IDpixel], X0, outputFrame[IDpixel]);
        }
+#endif
 }
 
 // local adaptation applied on a range of values which can be positive and negative
@@ -355,27 +377,6 @@ void BasicRetinaFilter::_localLuminanceAdaptationPosNegValues(const float *input
        }
 }
 
-// local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output
-void BasicRetinaFilter::_localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance)
-{
-       /*float meanLuminance=0;
-    const float *luminancePTR=inputOutputFrame;
-    for (unsigned int i=0;i<_filterOutput.getNBpixels();++i)
-      meanLuminance+=*(luminancePTR++);
-    meanLuminance/=_filterOutput.getNBpixels();
-    //float tempMeanValue=meanLuminance+_meanInputValue*_tau;
-
-    updateCompressionParameter(meanLuminance);
-        */
-       const float *localLuminancePTR=localLuminance;
-       float *inputOutputFramePTR=inputOutputFrame;
-
-       for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputOutputFramePTR)
-       {
-               float X0=*(localLuminancePTR++)*_localLuminanceFactor+_localLuminanceAddon;
-               *(inputOutputFramePTR) = (_maxInputValue+X0)**inputOutputFramePTR/(*inputOutputFramePTR +X0);
-       }
-}
 ///////////////////////////////////////////////////////////////////////
 /// Spatio temporal Low Pass filter functions
 // run LP filter and save result in the basic retina element buffer
@@ -465,7 +466,9 @@ void BasicRetinaFilter::_horizontalCausalFilter(float *outputFrame, unsigned int
 //  horizontal causal filter which adds the input inside
 void BasicRetinaFilter::_horizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
 {
-       //#pragma omp parallel for
+#ifdef HAVE_TBB
+        tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_horizontalCausalFilter_addInput(inputFrame, outputFrame, IDrowStart, _filterOutput.getNBcolumns(), _a, _tau), tbb::auto_partitioner());
+#else
        for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
        {
                register float* outputPTR=outputFrame+(IDrowStart+IDrow)*_filterOutput.getNBcolumns();
@@ -477,14 +480,16 @@ void BasicRetinaFilter::_horizontalCausalFilter_addInput(const float *inputFrame
                        *(outputPTR++) = result;
                }
        }
-
+#endif
 }
 
 //  horizontal anticausal filter  (basic way, no add on)
 void BasicRetinaFilter::_horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
 {
 
-       //#pragma omp parallel for
+#ifdef HAVE_TBB
+        tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_horizontalAnticausalFilter(outputFrame, IDrowEnd, _filterOutput.getNBcolumns(), _a ), tbb::auto_partitioner());
+#else
        for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
        {
                register float* outputPTR=outputFrame+(IDrowEnd-IDrow)*(_filterOutput.getNBcolumns())-1;
@@ -495,9 +500,9 @@ void BasicRetinaFilter::_horizontalAnticausalFilter(float *outputFrame, unsigned
                        *(outputPTR--) = result;
                }
        }
-
-
+#endif
 }
+
 //  horizontal anticausal filter which multiplies the output by _gain
 void BasicRetinaFilter::_horizontalAnticausalFilter_multGain(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
 {
@@ -518,8 +523,10 @@ void BasicRetinaFilter::_horizontalAnticausalFilter_multGain(float *outputFrame,
 //  vertical anticausal filter
 void BasicRetinaFilter::_verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
 {
-       //#pragma omp parallel for
-       for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
+#ifdef HAVE_TBB
+        tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_verticalCausalFilter(outputFrame, _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _a ), tbb::auto_partitioner());
+#else
+        for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
        {
                register float result=0;
                register float *outputPTR=outputFrame+IDcolumn;
@@ -532,6 +539,7 @@ void BasicRetinaFilter::_verticalCausalFilter(float *outputFrame, unsigned int I
 
                }
        }
+#endif
 }
 
 
@@ -558,7 +566,10 @@ void BasicRetinaFilter::_verticalAnticausalFilter(float *outputFrame, unsigned i
 //  vertical anticausal filter which multiplies the output by _gain
 void BasicRetinaFilter::_verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
 {
-       float* offset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns();
+#ifdef HAVE_TBB
+        tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_verticalAnticausalFilter_multGain(outputFrame, _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _a, _gain ), tbb::auto_partitioner());
+#else
+        float* offset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns();
        //#pragma omp parallel for
        for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
        {
@@ -573,7 +584,7 @@ void BasicRetinaFilter::_verticalAnticausalFilter_multGain(float *outputFrame, u
 
                }
        }
-
+#endif
 }
 
 /////////////////////////////////////////
@@ -745,8 +756,8 @@ void BasicRetinaFilter::_spatiotemporalLPfilter_Irregular(float *inputOutputFram
 
        // launch the serie of 1D directional filters in order to compute the 2D low pass filter
        _horizontalCausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBrows());
-       _horizontalAnticausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBrows());
-       _verticalCausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBcolumns());
+       _horizontalAnticausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBrows(), &_progressiveSpatialConstant[0]);
+       _verticalCausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBcolumns(), &_progressiveSpatialConstant[0]);
        _verticalAnticausalFilter_Irregular_multGain(inputOutputFrame, 0, (int)_filterOutput.getNBcolumns());
 
 }
@@ -765,8 +776,8 @@ void BasicRetinaFilter::_spatiotemporalLPfilter_Irregular(const float *inputFram
 
        // launch the serie of 1D directional filters in order to compute the 2D low pass filter
        _horizontalCausalFilter_Irregular_addInput(inputFrame, outputFrame, 0, (int)_filterOutput.getNBrows());
-       _horizontalAnticausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBrows());
-       _verticalCausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBcolumns());
+       _horizontalAnticausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBrows(), &_progressiveSpatialConstant[0]);
+       _verticalCausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBcolumns(), &_progressiveSpatialConstant[0]);
        _verticalAnticausalFilter_Irregular_multGain(outputFrame, 0, (int)_filterOutput.getNBcolumns());
 
 }
@@ -806,10 +817,13 @@ void BasicRetinaFilter::_horizontalCausalFilter_Irregular_addInput(const float *
 }
 
 //  horizontal anticausal filter  (basic way, no add on)
-void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
+void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const float *spatialConstantBuffer)
 {
+#ifdef HAVE_TBB
+        tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_horizontalAnticausalFilter_Irregular(outputFrame, spatialConstantBuffer, IDrowEnd, _filterOutput.getNBcolumns()), tbb::auto_partitioner());
+#else
        register float* outputPTR=outputFrame+IDrowEnd*(_filterOutput.getNBcolumns())-1;
-       register const float* spatialConstantPTR=&_progressiveSpatialConstant[0]+IDrowEnd*(_filterOutput.getNBcolumns())-1;
+       register const float* spatialConstantPTR=spatialConstantBuffer+IDrowEnd*(_filterOutput.getNBcolumns())-1;
 
        for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
        {
@@ -820,18 +834,21 @@ void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame
                        *(outputPTR--) = result;
                }
        }
-
+#endif
 
 }
 
 //  vertical anticausal filter
-void BasicRetinaFilter::_verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
+void BasicRetinaFilter::_verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const float *spatialConstantBuffer)
 {
+#ifdef HAVE_TBB
+        tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_verticalCausalFilter_Irregular(outputFrame, spatialConstantBuffer, _filterOutput.getNBrows(), _filterOutput.getNBcolumns()), tbb::auto_partitioner());
+#else
        for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
        {
                register float result=0;
                register float *outputPTR=outputFrame+IDcolumn;
-               register const float *spatialConstantPTR=&_progressiveSpatialConstant[0]+IDcolumn;
+               register const float *spatialConstantPTR=spatialConstantBuffer+IDcolumn;
                for (unsigned int index=0; index<_filterOutput.getNBrows(); ++index)
                {
                        result = *(outputPTR) + *(spatialConstantPTR) * result;
@@ -840,6 +857,7 @@ void BasicRetinaFilter::_verticalCausalFilter_Irregular(float *outputFrame, unsi
                        spatialConstantPTR+=_filterOutput.getNBcolumns();
                }
        }
+#endif
 }
 
 //  vertical anticausal filter which multiplies the output by _gain
index a8f61f1..c0cf100 100644 (file)
@@ -393,48 +393,261 @@ protected:
     // LP filter that squares the input and computes the output ONLY on the areas where the integrationAreas map are TRUE
     void _localSquaringSpatioTemporalLPfilter(const float *inputFrame, float *LPfilterOutput, const unsigned int *integrationAreas, const unsigned int filterIndex=0);
 
-    // local luminance adaptation of the input in regard of localLuminance buffer
-    void _localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame);
-    // local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output
-    void _localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance);
-    // local adaptation applied on a range of values which can be positive and negative
-    void _localLuminanceAdaptationPosNegValues(const float *inputFrame, const float *localLuminance, float *outputFrame);
+       // local luminance adaptation of the input in regard of localLuminance buffer
+       void _localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame, const bool updateLuminanceMean=true);
+       // local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output
+       void _localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance);
+       // local adaptation applied on a range of values which can be positive and negative
+       void _localLuminanceAdaptationPosNegValues(const float *inputFrame, const float *localLuminance, float *outputFrame);
 
 
     //////////////////////////////////////////////////////////////
     // 1D directional filters used for the 2D low pass filtering
 
-    // 1D filters with image input
-    void _horizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
-    // 1D filters  with image input that is squared in the function
-    void _squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
-    //  vertical anticausal filter that returns the mean value of its result
-    float _verticalAnticausalFilter_returnMeanValue(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
-
-    // most simple functions: only perform 1D filtering with output=input (no add on)
-    void _horizontalCausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
-    void _horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
-    void _verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
-    void _verticalAnticausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
-
-    // perform 1D filtering with output with varrying spatial coefficient
-    void _horizontalCausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
-    void _horizontalCausalFilter_Irregular_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
-    void _horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
-    void _verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
-    void _verticalAnticausalFilter_Irregular_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
-
-
-    // 1D filters in which the output is multiplied by _gain
-    void _verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output
-    void _horizontalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output
-
-    // LP filter on specific parts of the picture instead of all the image
-    // same functions (some of them) but take a binary flag to allow integration, false flag means, 0 at the output...
-    void _local_squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas);
-    void _local_horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas);
-    void _local_verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas);
-    void _local_verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas); // this functions affects _gain at the output
+       // 1D filters with image input
+       void _horizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
+       // 1D filters  with image input that is squared in the function // parallelized with TBB
+       void _squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
+       //  vertical anticausal filter that returns the mean value of its result
+       float _verticalAnticausalFilter_returnMeanValue(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
+
+       // most simple functions: only perform 1D filtering with output=input (no add on)
+       void _horizontalCausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
+       void _horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);     // parallelized with TBB
+       void _verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);     // parallelized with TBB 
+       void _verticalAnticausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
+
+       // perform 1D filtering with output with varrying spatial coefficient
+       void _horizontalCausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
+       void _horizontalCausalFilter_Irregular_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
+       void _horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const float *spatialConstantBuffer);   // parallelized with TBB 
+       void _verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const float *spatialConstantBuffer);   // parallelized with TBB 
+       void _verticalAnticausalFilter_Irregular_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
+
+
+       // 1D filters in which the output is multiplied by _gain
+       void _verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output // parallelized with TBB
+       void _horizontalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output
+
+       // LP filter on specific parts of the picture instead of all the image
+       // same functions (some of them) but take a binary flag to allow integration, false flag means, 0 at the output...
+       void _local_squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas);
+       void _local_horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas);
+       void _local_verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas);
+       void _local_verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas); // this functions affects _gain at the output
+
+#ifdef HAVE_TBB
+/******************************************************
+** IF TBB is useable, then, main loops are parallelized using these functors
+** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
+** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
+** ==> functors constructors can differ from the parameters used with their related serial functions
+*/
+
+#define _DEBUG_TBB // define DEBUG_TBB in order to display additionnal data on stdout
+    class Parallel_horizontalAnticausalFilter
+    {
+    private:
+        float *outputFrame;
+        const unsigned int IDrowEnd, nbColumns;
+        const float filterParam_a;
+    public:
+        // constructor which takes the input image pointer reference reference and limits
+        Parallel_horizontalAnticausalFilter(float *bufferToProcess, const unsigned int idEnd, const unsigned int nbCols, const float a )
+        :outputFrame(bufferToProcess), IDrowEnd(idEnd), nbColumns(nbCols), filterParam_a(a)
+        {
+#ifdef DEBUG_TBB
+            std::cout<<"Parallel_horizontalAnticausalFilter::Parallel_horizontalAnticausalFilter :"
+               <<"\n\t idEnd="<<IDrowEnd
+               <<"\n\t nbCols="<<nbColumns
+               <<"\n\t filterParam="<<filterParam_a
+               <<std::endl;
+#endif
+        }
+
+        void operator()( const tbb::blocked_range<size_t>& r ) const {
+
+#ifdef DEBUG_TBB
+            std::cout<<"Parallel_horizontalAnticausalFilter::operator() :"
+               <<"\n\t range size="<<r.size()
+               <<"\n\t first index="<<r.begin()
+               //<<"\n\t last index="<<filterParam
+               <<std::endl;
+#endif
+            for (size_t IDrow=r.begin(); IDrow!=r.end(); ++IDrow)
+           {
+                register float* outputPTR=outputFrame+(IDrowEnd-IDrow)*(nbColumns)-1;
+               register float result=0;
+               for (unsigned int index=0; index<nbColumns; ++index)
+               {
+                       result = *(outputPTR)+  filterParam_a* result;
+                       *(outputPTR--) = result;
+               }
+            }
+    }
+    };
+
+    class Parallel_horizontalCausalFilter_addInput
+    {
+    private:
+        const float *inputFrame;
+        float *outputFrame;
+        const unsigned int IDrowStart, nbColumns;
+        const float filterParam_a, filterParam_tau;
+    public:
+       Parallel_horizontalCausalFilter_addInput(const float *bufferToAddAsInputProcess, float *bufferToProcess, const unsigned int idStart, const unsigned int nbCols,  const float a,  const float tau)
+        :inputFrame(bufferToAddAsInputProcess), outputFrame(bufferToProcess), IDrowStart(idStart), nbColumns(nbCols), filterParam_a(a), filterParam_tau(tau){}
+
+        void operator()( const tbb::blocked_range<size_t>& r ) const {
+           for (unsigned int IDrow=r.begin(); IDrow!=r.end(); ++IDrow)
+           {
+               register float* outputPTR=outputFrame+(IDrowStart+IDrow)*nbColumns;
+               register const float* inputPTR=inputFrame+(IDrowStart+IDrow)*nbColumns;
+               register float result=0;
+               for (unsigned int index=0; index<nbColumns; ++index)
+               {
+                       result = *(inputPTR++) + filterParam_tau**(outputPTR)+  filterParam_a* result;
+                       *(outputPTR++) = result;
+               }
+           }
+        }
+    };
+
+    class Parallel_verticalCausalFilter
+    {
+    private:
+        float *outputFrame;
+        const unsigned int nbRows, nbColumns;
+        const float filterParam_a;
+    public:
+        Parallel_verticalCausalFilter(float *bufferToProcess, const unsigned int nbRws, const unsigned int nbCols, const float a )
+        :outputFrame(bufferToProcess), nbRows(nbRws), nbColumns(nbCols), filterParam_a(a){}
+        
+        void operator()( const tbb::blocked_range<size_t>& r ) const {
+            for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn)
+           {
+               register float result=0;
+               register float *outputPTR=outputFrame+IDcolumn;
+
+               for (unsigned int index=0; index<nbRows; ++index)
+               {
+                       result = *(outputPTR) + filterParam_a * result;
+                       *(outputPTR) = result;
+                       outputPTR+=nbColumns;
+
+               }
+           }
+        }
+    };
+
+    class Parallel_verticalAnticausalFilter_multGain
+    {
+    private:
+        float *outputFrame;
+        const unsigned int nbRows, nbColumns;
+        const float filterParam_a, filterParam_gain;
+    public:        
+        Parallel_verticalAnticausalFilter_multGain(float *bufferToProcess, const unsigned int nbRws, const unsigned int nbCols, const float a, const float  gain)
+        :outputFrame(bufferToProcess), nbRows(nbRws), nbColumns(nbCols), filterParam_a(a), filterParam_gain(gain){}
+        
+        void operator()( const tbb::blocked_range<size_t>& r ) const {
+            float* offset=outputFrame+nbColumns*nbRows-nbColumns;
+           for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn)
+           {
+               register float result=0;
+               register float *outputPTR=offset+IDcolumn;
+
+               for (unsigned int index=0; index<nbRows; ++index)
+               {
+                       result = *(outputPTR) + filterParam_a * result;
+                       *(outputPTR) = filterParam_gain*result;
+                       outputPTR-=nbColumns;
+
+               }
+           }
+        }
+    };
+
+    class Parallel_localAdaptation
+    {
+    private:
+         const float *localLuminance, *inputFrame;
+         float *outputFrame;
+         const float localLuminanceFactor, localLuminanceAddon, maxInputValue;
+    public:
+         Parallel_localAdaptation(const float *localLum, const float *inputImg, float *bufferToProcess, const float localLuminanceFact, const float localLuminanceAdd, const float maxInputVal)
+         :localLuminance(localLum), inputFrame(inputImg),outputFrame(bufferToProcess), localLuminanceFactor(localLuminanceFact), localLuminanceAddon(localLuminanceAdd), maxInputValue(maxInputVal) {};
+
+         void operator()( const tbb::blocked_range<size_t>& r ) const {
+             const float *localLuminancePTR=localLuminance+r.begin();
+            const float *inputFramePTR=inputFrame+r.begin();
+            float *outputFramePTR=outputFrame+r.begin();
+            for (register unsigned int IDpixel=r.begin() ; IDpixel!=r.end() ; ++IDpixel, ++inputFramePTR, ++outputFramePTR)
+            {
+               float X0=*(localLuminancePTR++)*localLuminanceFactor+localLuminanceAddon;
+               // TODO : the following line can lead to a divide by zero ! A small offset is added, take care if the offset is too large in case of High Dynamic Range images which can use very small values...               
+               *(outputFramePTR) = (maxInputValue+X0)**inputFramePTR/(*inputFramePTR +X0+0.00000000001);
+               //std::cout<<"BasicRetinaFilter::inputFrame[IDpixel]=%f, X0=%f, outputFrame[IDpixel]=%f\n", inputFrame[IDpixel], X0, outputFrame[IDpixel]);
+            }
+         }
+    };
+
+//////////////////////////////////////////
+/// Specific filtering methods which manage non const spatial filtering parameter (used By retinacolor and LogProjectors) 
+    class Parallel_horizontalAnticausalFilter_Irregular
+    {
+    private:
+       float *outputFrame;
+        const float *spatialConstantBuffer;
+        const unsigned int IDrowEnd, nbColumns;
+    public:
+        Parallel_horizontalAnticausalFilter_Irregular(float *bufferToProcess, const float *spatialConst, const unsigned int idEnd, const unsigned int nbCols)
+        :outputFrame(bufferToProcess), spatialConstantBuffer(spatialConst), IDrowEnd(idEnd), nbColumns(nbCols){}
+
+void operator()( const tbb::blocked_range<size_t>& r ) const {
+
+            for (size_t IDrow=r.begin(); IDrow!=r.end(); ++IDrow)
+           {
+                register float* outputPTR=outputFrame+(IDrowEnd-IDrow)*(nbColumns)-1;
+                register const float* spatialConstantPTR=spatialConstantBuffer+(IDrowEnd-IDrow)*(nbColumns)-1;
+               register float result=0;
+               for (unsigned int index=0; index<nbColumns; ++index)
+               {
+                       result = *(outputPTR)+  *(spatialConstantPTR--)* result;
+                       *(outputPTR--) = result;
+               }
+            }
+        }
+    };
+
+    class Parallel_verticalCausalFilter_Irregular
+    {
+    private:
+        float *outputFrame;
+        const float *spatialConstantBuffer;
+        const unsigned int nbRows, nbColumns;
+    public:
+        Parallel_verticalCausalFilter_Irregular(float *bufferToProcess, const float *spatialConst, const unsigned int nbRws, const unsigned int nbCols)
+        :outputFrame(bufferToProcess), spatialConstantBuffer(spatialConst), nbRows(nbRws), nbColumns(nbCols){}
+        
+        void operator()( const tbb::blocked_range<size_t>& r ) const {
+            for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn)
+           {
+               register float result=0;
+               register float *outputPTR=outputFrame+IDcolumn;
+                register const float* spatialConstantPTR=spatialConstantBuffer+IDcolumn;
+               for (unsigned int index=0; index<nbRows; ++index)
+               {
+                       result = *(outputPTR) +  *(spatialConstantPTR) * result;
+                       *(outputPTR) = result;
+                       outputPTR+=nbColumns;
+                        spatialConstantPTR+=nbColumns;
+               }
+           }
+        }
+    };
+
+#endif
 
 };
 
index ea5fdf8..4626133 100644 (file)
@@ -153,6 +153,9 @@ void MagnoRetinaFilter::setCoefficientsTable(const float parasolCells_beta, cons
 
 void MagnoRetinaFilter::_amacrineCellsComputing(const float *OPL_ON, const float *OPL_OFF)
 {
+#ifdef HAVE_TBB
+        tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_amacrineCellsComputing(OPL_ON, OPL_OFF, &_previousInput_ON[0], &_previousInput_OFF[0], &_amacrinCellsTempOutput_ON[0], &_amacrinCellsTempOutput_OFF[0], _temporalCoefficient), tbb::auto_partitioner());
+#else
        register const float *OPL_ON_PTR=OPL_ON;
        register const float *OPL_OFF_PTR=OPL_OFF;
        register float *previousInput_ON_PTR= &_previousInput_ON[0];
@@ -175,6 +178,7 @@ void MagnoRetinaFilter::_amacrineCellsComputing(const float *OPL_ON, const float
                *(previousInput_OFF_PTR++)=*(OPL_OFF_PTR++);
 
        }
+#endif
 }
 
 // launch filter that runs all the IPL filter
index b5c9015..daefb74 100644 (file)
@@ -190,10 +190,52 @@ private:
     // varialbles
     float _temporalCoefficient;
 
-    // amacrine cells filter : high pass temporal filter
-    void _amacrineCellsComputing(const float *ONinput, const float *OFFinput);
-
-
+       // amacrine cells filter : high pass temporal filter
+       void _amacrineCellsComputing(const float *ONinput, const float *OFFinput);
+#ifdef HAVE_TBB
+/******************************************************
+** IF TBB is useable, then, main loops are parallelized using these functors
+** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
+** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
+** ==> functors constructors can differ from the parameters used with their related serial functions
+*/
+    class Parallel_amacrineCellsComputing
+    {
+    private:
+       const float *OPL_ON, *OPL_OFF;
+        float *previousInput_ON, *previousInput_OFF, *amacrinCellsTempOutput_ON, *amacrinCellsTempOutput_OFF;
+       const float temporalCoefficient;
+    public:
+        Parallel_amacrineCellsComputing(const float *OPL_ON_PTR, const float *OPL_OFF_PTR, float *previousInput_ON_PTR, float *previousInput_OFF_PTR, float *amacrinCellsTempOutput_ON_PTR, float *amacrinCellsTempOutput_OFF_PTR, float temporalCoefficientVal)
+        :OPL_ON(OPL_ON_PTR), OPL_OFF(OPL_OFF_PTR), previousInput_ON(previousInput_ON_PTR), previousInput_OFF(previousInput_OFF_PTR), amacrinCellsTempOutput_ON(amacrinCellsTempOutput_ON_PTR), amacrinCellsTempOutput_OFF(amacrinCellsTempOutput_OFF_PTR), temporalCoefficient(temporalCoefficientVal) {}
+        
+        void operator()( const tbb::blocked_range<size_t>& r ) const {
+       register const float *OPL_ON_PTR=OPL_ON+r.begin();
+       register const float *OPL_OFF_PTR=OPL_OFF+r.begin();
+       register float *previousInput_ON_PTR= previousInput_ON+r.begin();
+       register float *previousInput_OFF_PTR= previousInput_OFF+r.begin();
+       register float *amacrinCellsTempOutput_ON_PTR= amacrinCellsTempOutput_ON+r.begin();
+       register float *amacrinCellsTempOutput_OFF_PTR= amacrinCellsTempOutput_OFF+r.begin();
+
+       for (unsigned int IDpixel=r.begin() ; IDpixel!=r.end(); ++IDpixel)
+       {
+
+               /* Compute ON and OFF amacrin cells high pass temporal filter */
+               float magnoXonPixelResult = temporalCoefficient*(*amacrinCellsTempOutput_ON_PTR+ *OPL_ON_PTR-*previousInput_ON_PTR);
+               *(amacrinCellsTempOutput_ON_PTR++)=((float)(magnoXonPixelResult>0))*magnoXonPixelResult;
+
+               float magnoXoffPixelResult = temporalCoefficient*(*amacrinCellsTempOutput_OFF_PTR+ *OPL_OFF_PTR-*previousInput_OFF_PTR);
+               *(amacrinCellsTempOutput_OFF_PTR++)=((float)(magnoXoffPixelResult>0))*magnoXoffPixelResult;
+
+               /* prepare next loop */
+               *(previousInput_ON_PTR++)=*(OPL_ON_PTR++);
+               *(previousInput_OFF_PTR++)=*(OPL_OFF_PTR++);
+
+       }
+        }
+
+    };
+#endif
 };
 
 }
index a6cbf1b..50d1b0b 100644 (file)
@@ -199,17 +199,20 @@ const std::valarray<float> &ParvoRetinaFilter::runFilter(const std::valarray<flo
        return (*_parvocellularOutputONminusOFF);
 }
 
-void ParvoRetinaFilter::_OPL_OnOffWaysComputing()
+void ParvoRetinaFilter::_OPL_OnOffWaysComputing() // WARNING : this method requires many buffer accesses, parallelizing can increase bandwith & core efficacy
 {
        // loop that makes the difference between photoreceptor cells output and horizontal cells
        // positive part goes on the ON way, negative pat goes on the OFF way
-       register float *photoreceptorsOutput_PTR= &_photoreceptorsOutput[0];
-       register float *horizontalCellsOutput_PTR= &_horizontalCellsOutput[0];
-       register float *bipolarCellsON_PTR = &_bipolarCellsOutputON[0];
-       register float *bipolarCellsOFF_PTR = &_bipolarCellsOutputOFF[0];
-       register float *parvocellularOutputON_PTR= &_parvocellularOutputON[0];
-       register float *parvocellularOutputOFF_PTR= &_parvocellularOutputOFF[0];
 
+#ifdef HAVE_TBB
+        tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_OPL_OnOffWaysComputing(&_photoreceptorsOutput[0], &_horizontalCellsOutput[0], &_bipolarCellsOutputON[0], &_bipolarCellsOutputOFF[0], &_parvocellularOutputON[0], &_parvocellularOutputOFF[0]), tbb::auto_partitioner());
+#else
+       float *photoreceptorsOutput_PTR= &_photoreceptorsOutput[0];
+       float *horizontalCellsOutput_PTR= &_horizontalCellsOutput[0];
+       float *bipolarCellsON_PTR = &_bipolarCellsOutputON[0];
+       float *bipolarCellsOFF_PTR = &_bipolarCellsOutputOFF[0];
+       float *parvocellularOutputON_PTR= &_parvocellularOutputON[0];
+       float *parvocellularOutputOFF_PTR= &_parvocellularOutputOFF[0];
        // compute bipolar cells response equal to photoreceptors minus horizontal cells response
        // and copy the result on parvo cellular outputs... keeping time before their local contrast adaptation for final result
        for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel)
@@ -222,6 +225,7 @@ void ParvoRetinaFilter::_OPL_OnOffWaysComputing()
                *(parvocellularOutputON_PTR++)=*(bipolarCellsON_PTR++) = isPositive*pixelDifference;
                *(parvocellularOutputOFF_PTR++)=*(bipolarCellsOFF_PTR++)= (isPositive-1.0f)*pixelDifference;
        }
+#endif
 }
 }
 
index 54521b1..76f5506 100644 (file)
@@ -216,6 +216,45 @@ private:
        // private functions
        void _OPL_OnOffWaysComputing();
 
+#ifdef HAVE_TBB
+/******************************************************
+** IF TBB is useable, then, main loops are parallelized using these functors
+** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
+** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
+** ==> functors constructors can differ from the parameters used with their related serial functions
+*/
+    class Parallel_OPL_OnOffWaysComputing
+    {
+    private:
+       float *photoreceptorsOutput, *horizontalCellsOutput, *bipolarCellsON, *bipolarCellsOFF, *parvocellularOutputON, *parvocellularOutputOFF;
+    public:
+        Parallel_OPL_OnOffWaysComputing(float *photoreceptorsOutput_PTR, float *horizontalCellsOutput_PTR, float *bipolarCellsON_PTR, float *bipolarCellsOFF_PTR, float *parvocellularOutputON_PTR, float *parvocellularOutputOFF_PTR)
+        :photoreceptorsOutput(photoreceptorsOutput_PTR), horizontalCellsOutput(horizontalCellsOutput_PTR), bipolarCellsON(bipolarCellsON_PTR), bipolarCellsOFF(bipolarCellsOFF_PTR), parvocellularOutputON(parvocellularOutputON_PTR), parvocellularOutputOFF(parvocellularOutputOFF_PTR) {}
+        
+        void operator()( const tbb::blocked_range<size_t>& r ) const {
+           // compute bipolar cells response equal to photoreceptors minus horizontal cells response
+           // and copy the result on parvo cellular outputs... keeping time before their local contrast adaptation for final result
+           float *photoreceptorsOutput_PTR= photoreceptorsOutput+r.begin();
+           float *horizontalCellsOutput_PTR= horizontalCellsOutput+r.begin();
+           float *bipolarCellsON_PTR = bipolarCellsON+r.begin();
+           float *bipolarCellsOFF_PTR = bipolarCellsOFF+r.begin();
+           float *parvocellularOutputON_PTR= parvocellularOutputON+r.begin();
+           float *parvocellularOutputOFF_PTR= parvocellularOutputOFF+r.begin();
+
+            for (register unsigned int IDpixel=r.begin() ; IDpixel!=r.end() ; ++IDpixel)
+           {
+               float pixelDifference = *(photoreceptorsOutput_PTR++) -*(horizontalCellsOutput_PTR++);
+               // test condition to allow write pixelDifference in ON or OFF buffer and 0 in the over
+               float isPositive=(float) (pixelDifference>0.0f);
+
+               // ON and OFF channels writing step
+               *(parvocellularOutputON_PTR++)=*(bipolarCellsON_PTR++) = isPositive*pixelDifference;
+               *(parvocellularOutputOFF_PTR++)=*(bipolarCellsOFF_PTR++)= (isPositive-1.0f)*pixelDifference;
+           }
+        }
+    };
+#endif
+
 };
 }
 #endif
index f98b050..b452956 100644 (file)
@@ -89,7 +89,7 @@ RetinaColor::RetinaColor(const unsigned int NBrows, const unsigned int NBcolumns
  _demultiplexedColorFrame(NBrows*NBcolumns*3),
  _chrominance(NBrows*NBcolumns*3),
  _colorLocalDensity(NBrows*NBcolumns*3),
- _imageGradient(NBrows*NBcolumns*3)
+ _imageGradient(NBrows*NBcolumns*2)
 {
        // link to parent buffers (let's recycle !)
        _luminance=&_filterOutput;
@@ -126,12 +126,12 @@ RetinaColor::~RetinaColor()
 void RetinaColor::clearAllBuffers()
 {
        BasicRetinaFilter::clearAllBuffers();
-       _tempMultiplexedFrame=0;
-       _demultiplexedTempBuffer=0;
+       _tempMultiplexedFrame=0.f;
+       _demultiplexedTempBuffer=0.f;
 
-       _demultiplexedColorFrame=0;
-       _chrominance=0;
-       _imageGradient=1;
+       _demultiplexedColorFrame=0.f;
+       _chrominance=0.f;
+       _imageGradient=0.57f;
 }
 
 /**
@@ -149,7 +149,7 @@ void RetinaColor::resize(const unsigned int NBrows, const unsigned int NBcolumns
        _demultiplexedColorFrame.resize(NBrows*NBcolumns*3);
        _chrominance.resize(NBrows*NBcolumns*3);
        _colorLocalDensity.resize(NBrows*NBcolumns*3);
-       _imageGradient.resize(NBrows*NBcolumns*3);
+       _imageGradient.resize(NBrows*NBcolumns*2);
 
        // link to parent buffers (let's recycle !)
        _luminance=&_filterOutput;
@@ -325,15 +325,15 @@ void RetinaColor::runColorDemultiplexing(const std::valarray<float> &multiplexed
 
        }else
        {
-               register const float *multiplexedColorFramePTR1= get_data(multiplexedColorFrame);
-               for (unsigned int indexc=0; indexc<_filterOutput.getNBpixels() ; ++indexc, ++chrominancePTR, ++colorLocalDensityPTR, ++luminance, ++multiplexedColorFramePTR1)
+               register const float *multiplexedColorFramePTR= get_data(multiplexedColorFrame);
+               for (unsigned int indexc=0; indexc<_filterOutput.getNBpixels() ; ++indexc, ++chrominancePTR, ++colorLocalDensityPTR, ++luminance, ++multiplexedColorFramePTR)
                {
                        // normalize by photoreceptors density
                        float Cr=*(chrominancePTR)*_colorLocalDensity[indexc];
                        float Cg=*(chrominancePTR+_filterOutput.getNBpixels())*_colorLocalDensity[indexc+_filterOutput.getNBpixels()];
                        float Cb=*(chrominancePTR+_filterOutput.getDoubleNBpixels())*_colorLocalDensity[indexc+_filterOutput.getDoubleNBpixels()];
                        *luminance=(Cr+Cg+Cb)*_pG;
-                       _demultiplexedTempBuffer[_colorSampling[indexc]] = *multiplexedColorFramePTR1 - *luminance;
+                       _demultiplexedTempBuffer[_colorSampling[indexc]] = *multiplexedColorFramePTR - *luminance;
 
                }
 
@@ -349,8 +349,9 @@ void RetinaColor::runColorDemultiplexing(const std::valarray<float> &multiplexed
                _adaptiveSpatialLPfilter(&_demultiplexedTempBuffer[0]+_filterOutput.getNBpixels(), &_demultiplexedColorFrame[0]+_filterOutput.getNBpixels());
                _adaptiveSpatialLPfilter(&_demultiplexedTempBuffer[0]+_filterOutput.getDoubleNBpixels(), &_demultiplexedColorFrame[0]+_filterOutput.getDoubleNBpixels());
 
-               for (unsigned int index=0; index<_filterOutput.getNBpixels()*3 ; ++index) // cette boucle pourrait �tre supprimee en passant la densit� � la fonction de filtrage
-                       _demultiplexedColorFrame[index] /= _chrominance[index];
+/*             for (unsigned int index=0; index<_filterOutput.getNBpixels()*3 ; ++index) // cette boucle pourrait �tre supprimee en passant la densit� � la fonction de filtrage
+                       _demultiplexedColorFrame[index] /= _chrominance[index];*/
+               _demultiplexedColorFrame/=_chrominance; // more optimal ;o)
 
                // compute and substract the residual luminance
                for (unsigned int index=0; index<_filterOutput.getNBpixels() ; ++index)
@@ -432,6 +433,9 @@ void RetinaColor::clipRGBOutput_0_maxInputValue(float *inputOutputBuffer, const
        if (inputOutputBuffer==NULL)
                inputOutputBuffer= &_demultiplexedColorFrame[0];
 
+#ifdef HAVE_TBB // call the TemplateBuffer TBB clipping method
+        tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()*3), Parallel_clipBufferValues<float>(inputOutputBuffer, 0, maxInputValue), tbb::auto_partitioner());
+#else
        register float *inputOutputBufferPTR=inputOutputBuffer;
        for (register unsigned int jf = 0; jf < _filterOutput.getNBpixels()*3; ++jf, ++inputOutputBufferPTR)
        {
@@ -440,6 +444,7 @@ void RetinaColor::clipRGBOutput_0_maxInputValue(float *inputOutputBuffer, const
                else if (*inputOutputBufferPTR<0)
                        *inputOutputBufferPTR=0;
        }
+#endif
        //std::cout<<"RetinaColor::...normalizing RGB frame OK"<<std::endl;
 }
 
@@ -535,8 +540,8 @@ void RetinaColor::_applyRIFfilter(const float *sourceBuffer, float *destinationB
 
 void RetinaColor::_getNormalizedContoursImage(const float *inputFrame, float *outputFrame)
 {
-       float maxValue=0;
-       float normalisationFactor=1.f/3;
+       float maxValue=0.f;
+       float normalisationFactor=1.f/3.f;
        for (unsigned int indexr=1 ; indexr<_filterOutput.getNBrows()-1; ++indexr)
        {
                for (unsigned int indexc=1 ; indexc<_filterOutput.getNBcolumns()-1; ++indexc)
@@ -564,19 +569,23 @@ void RetinaColor::_adaptiveSpatialLPfilter(const float *inputFrame, float *outpu
        _gain = (1-0.57f)*(1-0.57f)*(1-0.06f)*(1-0.06f);
 
        // launch the serie of 1D directional filters in order to compute the 2D low pass filter
+       // -> horizontal filters work with the first layer of imageGradient
        _adaptiveHorizontalCausalFilter_addInput(inputFrame, outputFrame, 0, _filterOutput.getNBrows());
-       _adaptiveHorizontalAnticausalFilter(outputFrame, 0, _filterOutput.getNBrows());
-       _adaptiveVerticalCausalFilter(outputFrame, 0, _filterOutput.getNBcolumns());
+       _horizontalAnticausalFilter_Irregular(outputFrame, 0, _filterOutput.getNBrows(), &_imageGradient[0]);
+       // -> horizontal filters work with the second layer of imageGradient
+       _verticalCausalFilter_Irregular(outputFrame, 0, _filterOutput.getNBcolumns(), &_imageGradient[0]+_filterOutput.getNBpixels());
        _adaptiveVerticalAnticausalFilter_multGain(outputFrame, 0, _filterOutput.getNBcolumns());
-
 }
 
-//  horizontal causal filter which adds the input inside
+//  horizontal causal filter which adds the input inside... replaces the parent _horizontalCausalFilter_Irregular_addInput by avoiding a product for each pixel
 void RetinaColor::_adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
 {
+#ifdef HAVE_TBB
+        tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_adaptiveHorizontalCausalFilter_addInput(inputFrame, outputFrame, &_imageGradient[0], _filterOutput.getNBcolumns()), tbb::auto_partitioner());
+#else
        register float* outputPTR=outputFrame+IDrowStart*_filterOutput.getNBcolumns();
        register const float* inputPTR=inputFrame+IDrowStart*_filterOutput.getNBcolumns();
-       register float *imageGradientPTR= &_imageGradient[0]+IDrowStart*_filterOutput.getNBcolumns();
+       register const float *imageGradientPTR= &_imageGradient[0]+IDrowStart*_filterOutput.getNBcolumns();
        for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
        {
                register float result=0;
@@ -589,51 +598,17 @@ void RetinaColor::_adaptiveHorizontalCausalFilter_addInput(const float *inputFra
                }
                //        std::cout<<" "<<std::endl;
        }
+#endif
 }
 
-//  horizontal anticausal filter  (basic way, no add on)
-void RetinaColor::_adaptiveHorizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
-{
-       register float* outputPTR=outputFrame+IDrowEnd*(_filterOutput.getNBcolumns())-1;
-       register float *imageGradientPTR= &_imageGradient[0]+IDrowEnd*(_filterOutput.getNBcolumns())-1;
-
-       for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
-       {
-               register float result=0;
-               for (unsigned int index=0; index<_filterOutput.getNBcolumns(); ++index)
-               {
-                       result = *(outputPTR)+  (*imageGradientPTR)* result;
-                       *(outputPTR--) = result;
-                       --imageGradientPTR;
-               }
-       }
-}
-
-//  vertical anticausal filter
-void RetinaColor::_adaptiveVerticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
-{
-       for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
-       {
-               register float result=0;
-               register float *outputPTR=outputFrame+IDcolumn;
-               register float *imageGradientPTR= &_imageGradient[0]+IDcolumn;
-
-               for (unsigned int index=0; index<_filterOutput.getNBrows(); ++index)
-               {
-                       result = *(outputPTR) + (*(imageGradientPTR+_filterOutput.getNBpixels())) * result;
-                       *(outputPTR) = result;
-                       outputPTR+=_filterOutput.getNBcolumns();
-                       imageGradientPTR+=_filterOutput.getNBcolumns();
-
-               }
-       }
-}
-
-//  vertical anticausal filter which multiplies the output by _gain
+//  vertical anticausal filter which multiplies the output by _gain... replaces the parent _verticalAnticausalFilter_multGain by avoiding a product for each pixel and taking into account the second layer of the _imageGradient buffer
 void RetinaColor::_adaptiveVerticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
 {
+#ifdef HAVE_TBB
+        tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_adaptiveVerticalAnticausalFilter_multGain(outputFrame, &_imageGradient[0]+_filterOutput.getNBpixels(), _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _gain), tbb::auto_partitioner());
+#else
        float* outputOffset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns();
-       float* gradOffset= &_imageGradient[0]+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns();
+       float* gradOffset= &_imageGradient[0]+_filterOutput.getNBpixels()*2-_filterOutput.getNBcolumns();
 
        for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
        {
@@ -642,12 +617,13 @@ void RetinaColor::_adaptiveVerticalAnticausalFilter_multGain(float *outputFrame,
                register float *imageGradientPTR=gradOffset+IDcolumn;
                for (unsigned int index=0; index<_filterOutput.getNBrows(); ++index)
                {
-                       result = *(outputPTR) + (*(imageGradientPTR+_filterOutput.getNBpixels())) * result;
+                       result = *(outputPTR) + (*(imageGradientPTR)) * result;
                        *(outputPTR) = _gain*result;
                        outputPTR-=_filterOutput.getNBcolumns();
                        imageGradientPTR-=_filterOutput.getNBcolumns();
                }
        }
+#endif
 }
 
 ///////////////////////////
index 622f9a0..ec5d060 100644 (file)
@@ -248,9 +248,7 @@ protected:
        void _getNormalizedContoursImage(const float *inputFrame, float *outputFrame);
        // -> special adaptive filters dedicated to low pass filtering on the chrominance (skeeps filtering on the edges)
        void _adaptiveSpatialLPfilter(const float *inputFrame,  float *outputFrame);
-       void _adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, const unsigned int IDrowStart, const unsigned int IDrowEnd);
-       void _adaptiveHorizontalAnticausalFilter(float *outputFrame, const unsigned int IDrowStart, const unsigned int IDrowEnd);
-       void _adaptiveVerticalCausalFilter(float *outputFrame, const unsigned int IDcolumnStart, const unsigned int IDcolumnEnd);
+       void _adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, const unsigned int IDrowStart, const unsigned int IDrowEnd); // TBB parallelized
        void _adaptiveVerticalAnticausalFilter_multGain(float *outputFrame, const unsigned int IDcolumnStart, const unsigned int IDcolumnEnd);
        void _computeGradient(const float *luminance);
        void _normalizeOutputs_0_maxOutputValue(void);
@@ -258,6 +256,84 @@ protected:
        // color space transform
        void _applyImageColorSpaceConversion(const std::valarray<float> &inputFrame, std::valarray<float> &outputFrame, const float *transformTable);
 
+#ifdef HAVE_TBB
+/******************************************************
+** IF TBB is useable, then, main loops are parallelized using these functors
+** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
+** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
+** ==> functors constructors can differ from the parameters used with their related serial functions
+*/
+
+/* Template :
+    class 
+    {
+    private:
+
+    public:
+         Parallel_()
+         : {}
+    
+         void operator()( const tbb::blocked_range<size_t>& r ) const {
+
+        }
+    }:
+*/
+    class Parallel_adaptiveHorizontalCausalFilter_addInput
+    {
+    private:
+       float *outputFrame;
+       const float *inputFrame, *imageGradient;
+       const unsigned int nbColumns;
+    public:
+         Parallel_adaptiveHorizontalCausalFilter_addInput(const float *inputImg, float *bufferToProcess, const float *imageGrad, const unsigned int nbCols)
+         :outputFrame(bufferToProcess), inputFrame(inputImg), imageGradient(imageGrad), nbColumns(nbCols) {};
+    
+         void operator()( const tbb::blocked_range<size_t>& r ) const {
+            register float* outputPTR=outputFrame+r.begin()*nbColumns;
+           register const float* inputPTR=inputFrame+r.begin()*nbColumns;
+           register const float *imageGradientPTR= imageGradient+r.begin()*nbColumns;
+           for (unsigned int IDrow=r.begin(); IDrow!=r.end(); ++IDrow)
+           {
+               register float result=0;
+               for (unsigned int index=0; index<nbColumns; ++index)
+               {
+                       result = *(inputPTR++) + (*imageGradientPTR++)* result;
+                       *(outputPTR++) = result;
+               }
+           }
+        }
+    };
+
+    class Parallel_adaptiveVerticalAnticausalFilter_multGain
+    {
+    private:
+        float *outputFrame;
+       const float *imageGradient;
+        const unsigned int nbRows, nbColumns;
+        const float filterParam_gain;
+    public:        
+        Parallel_adaptiveVerticalAnticausalFilter_multGain(float *bufferToProcess, const float *imageGrad, const unsigned int nbRws, const unsigned int nbCols, const float  gain)
+        :outputFrame(bufferToProcess), imageGradient(imageGrad), nbRows(nbRws), nbColumns(nbCols), filterParam_gain(gain){}
+        
+        void operator()( const tbb::blocked_range<size_t>& r ) const {
+            float* offset=outputFrame+nbColumns*nbRows-nbColumns;
+            const float* gradOffset= imageGradient+nbColumns*nbRows-nbColumns;
+           for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn)
+           {
+               register float result=0;
+               register float *outputPTR=offset+IDcolumn;
+               register const float *imageGradientPTR=gradOffset+IDcolumn;
+               for (unsigned int index=0; index<nbRows; ++index)
+               {
+                       result = *(outputPTR) + *(imageGradientPTR) * result;
+                       *(outputPTR) = filterParam_gain*result;
+                       outputPTR-=nbColumns;
+                       imageGradientPTR-=nbColumns;
+               }
+           }
+        }
+    };
+#endif
 };
 }
 
index 06a519e..b119b95 100644 (file)
 #include <iostream>
 #include <cmath>
 
+
+//// If TBB is used
+// ==> then include required includes 
+#ifdef HAVE_TBB
+#include "tbb/parallel_for.h"
+#include "tbb/blocked_range.h"
+
+// ==> declare usefull generic tools
+template <class type>
+class Parallel_clipBufferValues
+{
+private:
+    type *bufferToClip;
+    const type minValue, maxValue;
+    
+public:
+    Parallel_clipBufferValues(type* bufferToProcess, const type min, const type max)
+    : bufferToClip(bufferToProcess), minValue(min), maxValue(max){}
+
+    void operator()( const tbb::blocked_range<size_t>& r ) const {
+       register type *inputOutputBufferPTR=bufferToClip+r.begin();
+        for (register unsigned int jf = r.begin(); jf != r.end(); ++jf, ++inputOutputBufferPTR)
+       {
+           if (*inputOutputBufferPTR>maxValue)
+               *inputOutputBufferPTR=maxValue;
+           else if (*inputOutputBufferPTR<minValue)
+               *inputOutputBufferPTR=minValue;
+       }
+    }
+};
+#endif
+
 //#define __TEMPLATEBUFFERDEBUG //define TEMPLATEBUFFERDEBUG in order to display debug information
 
 namespace cv
@@ -351,21 +383,25 @@ public:
             }
         }
 
-        std::cout<<"Tdebug"<<std::endl;
-        std::cout<<"deltaL="<<deltaL<<", deltaH="<<deltaH<<std::endl;
-        std::cout<<"this->max()"<<this->max()<<"maxThreshold="<<maxThreshold<<"updatedHighValue="<<updatedHighValue<<std::endl;
-        std::cout<<"this->min()"<<this->min()<<"minThreshold="<<minThreshold<<"updatedLowValue="<<updatedLowValue<<std::endl;
-        // clipping values outside than the updated thresholds
-        bufferPTR=this->Buffer();
-        for (unsigned int i=0;i<this->size();++i, ++bufferPTR)
-        {
-            if (*bufferPTR<updatedLowValue)
-                *bufferPTR=updatedLowValue;
-            else if (*bufferPTR>updatedHighValue)
-                *bufferPTR=updatedHighValue;
-        }
-
-        normalizeGrayOutput_0_maxOutputValue(this->Buffer(), this->size(), maxOutputValue);
+               std::cout<<"Tdebug"<<std::endl;
+               std::cout<<"deltaL="<<deltaL<<", deltaH="<<deltaH<<std::endl;
+               std::cout<<"this->max()"<<this->max()<<"maxThreshold="<<maxThreshold<<"updatedHighValue="<<updatedHighValue<<std::endl;
+               std::cout<<"this->min()"<<this->min()<<"minThreshold="<<minThreshold<<"updatedLowValue="<<updatedLowValue<<std::endl;
+               // clipping values outside than the updated thresholds
+                bufferPTR=this->Buffer();
+#ifdef HAVE_TBB // call the TemplateBuffer TBB clipping method
+                tbb::parallel_for(tbb::blocked_range<size_t>(0,this->size()), Parallel_clipBufferValues<type>(bufferPTR, updatedLowValue, updatedHighValue), tbb::auto_partitioner());
+#else
+
+               for (unsigned int i=0;i<this->size();++i, ++bufferPTR)
+               {
+                       if (*bufferPTR<updatedLowValue)
+                               *bufferPTR=updatedLowValue;
+                       else if (*bufferPTR>updatedHighValue)
+                               *bufferPTR=updatedHighValue;
+               }
+#endif
+               normalizeGrayOutput_0_maxOutputValue(this->Buffer(), this->size(), maxOutputValue);
 
     }