fixed 2 bugs in the recently modified Lapack functions
authorVadim Pisarevsky <no@email>
Tue, 31 Aug 2010 12:39:00 +0000 (12:39 +0000)
committerVadim Pisarevsky <no@email>
Tue, 31 Aug 2010 12:39:00 +0000 (12:39 +0000)
3rdparty/lapack/dgemv_custom.c
3rdparty/lapack/sgemv_custom.c
modules/core/src/lapack.cpp

index 6edee4e..70b5ef4 100644 (file)
                 if( s == 0. )
                     continue;
                 s *= alpha;
-                for( j = 0; j <= m - 2; j += 2 )
+                for( j = 0; j <= m - 4; j += 4 )
                 {
                     doublereal t0 = y[j] + s*a[j];
                     doublereal t1 = y[j+1] + s*a[j+1];
                     y[j] = t0; y[j+1] = t1;
+                    t0 = y[j+2] + s*a[j+2];
+                    t1 = y[j+3] + s*a[j+3];
+                    y[j+2] = t0; y[j+3] = t1;
                 }
                 
                 for( ; j < m; j++ )
             for( i = 0; i < n; i++, a += lda )
             {
                 doublereal s = 0;
-                for( j = 0; j <= m - 2; j += 2 )
-                    s += x[j]*a[j] + x[j+1]*a[j+1];
+                for( j = 0; j <= m - 4; j += 4 )
+                    s += x[j]*a[j] + x[j+1]*a[j+1] + x[j+2]*a[j+2] + x[j+3]*a[j+3];
                 for( ; j < m; j++ )
                     s += x[j]*a[j];
                 y[i*incy] += alpha*s;
index 0cb7a97..eff4418 100644 (file)
         ;
     else if( trans == 'N' )
     {
-        for( i = 0; i < n; i++, a += lda )
+        if( incy == 1 )
         {
-            real s = x[i*incx];
-            if( s == 0.f )
-                continue;
-            s *= alpha;
-            
-            for( j = 0; j <= m - 4; j += 4 )
+            for( i = 0; i < n; i++, a += lda )
             {
-                real t0 = y[j] + s*a[j];
-                real t1 = y[j+1] + s*a[j+1];
-                y[j] = t0; y[j+1] = t1;
-                t0 = y[j+2] + s*a[j+2];
-                t1 = y[j+3] + s*a[j+3];
-                y[j+2] = t0; y[j+3] = t1;
-            }
+                real s = x[i*incx];
+                if( s == 0.f )
+                    continue;
+                s *= alpha;
+                
+                for( j = 0; j <= m - 4; j += 4 )
+                {
+                    real t0 = y[j] + s*a[j];
+                    real t1 = y[j+1] + s*a[j+1];
+                    y[j] = t0; y[j+1] = t1;
+                    t0 = y[j+2] + s*a[j+2];
+                    t1 = y[j+3] + s*a[j+3];
+                    y[j+2] = t0; y[j+3] = t1;
+                }
 
-            for( ; j < m; j++ )
-                y[j] += s*a[j];
+                for( ; j < m; j++ )
+                    y[j] += s*a[j];
+            }
+        }
+        else
+        {
+            for( i = 0; i < n; i++, a += lda )
+            {
+                real s = x[i*incx];
+                if( s == 0. )
+                    continue;
+                s *= alpha;
+                for( j = 0; j < m; j++ )
+                    y[j*incy] += s*a[j];
+            }
         }
     }
     else
     {
-        for( i = 0; i < n; i++, a += lda )
+        if( incx == 1 )
         {
-            real s = 0;
-            for( j = 0; j <= m - 4; j += 4 )
-                s += x[j]*a[j] + x[j+1]*a[j+1] + x[j+2]*a[j+2] + x[j+3]*a[j+3];
-            for( ; j < m; j++ )
-                s += x[j]*a[j];
-            y[i*incy] += alpha*s;
+            for( i = 0; i < n; i++, a += lda )
+            {
+                real s = 0;
+                for( j = 0; j <= m - 4; j += 4 )
+                    s += x[j]*a[j] + x[j+1]*a[j+1] + x[j+2]*a[j+2] + x[j+3]*a[j+3];
+                for( ; j < m; j++ )
+                    s += x[j]*a[j];
+                y[i*incy] += alpha*s;
+            }
+        }
+        else
+        {
+            for( i = 0; i < n; i++, a += lda )
+            {
+                real s = 0;
+                for( j = 0; j < m; j++ )
+                    s += x[j*incx]*a[j];
+                y[i*incy] += alpha*s;
+            }
         }
     }
     
index 6d9a876..7a95fb4 100644 (file)
@@ -1327,6 +1327,7 @@ static void _SVDcompute( const Mat& a, Mat& w, Mat* u, Mat* vt, int flags )
         if(u) u->release();
         if(vt) vt->release();
         u = vt = 0;
+        compute_uv = false;
     }
     
     if( compute_uv )
@@ -1417,13 +1418,13 @@ static void _SVDcompute( const Mat& a, Mat& w, Mat* u, Mat* vt, int flags )
     if( type == CV_32F )
     {
         sgesdd_(mode, &n, &m, (float*)_a.data, &lda, (float*)w.data,
-                (float*)vt->data, &ldv, (float*)u->data, &ldu,
+                vt ? (float*)vt->data : (float*)&v1, &ldv, u ? (float*)u->data : (float*)&u1, &ldu,
                 (float*)(buffer + work_ofs), &lwork, (integer*)(buffer + iwork_ofs), &info );
     }
     else
     {
         dgesdd_(mode, &n, &m, (double*)_a.data, &lda, (double*)w.data,
-                (double*)vt->data, &ldv, (double*)u->data, &ldu,
+                vt ? (double*)vt->data : &v1, &ldv, u ? (double*)u->data : &u1, &ldu,
                 (double*)(buffer + work_ofs), &lwork, (integer*)(buffer + iwork_ofs), &info );
     }
     CV_Assert(info >= 0);