optimize memory usage

author marina.kolpakova <marina.kolpakova@itseez.com>

Thu, 27 Sep 2012 08:44:06 +0000 (12:44 +0400)

committer marina.kolpakova <marina.kolpakova@itseez.com>

Sat, 10 Nov 2012 01:05:43 +0000 (05:05 +0400)
author marina.kolpakova <marina.kolpakova@itseez.com>
Thu, 27 Sep 2012 08:44:06 +0000 (12:44 +0400)
committer marina.kolpakova <marina.kolpakova@itseez.com>
Sat, 10 Nov 2012 01:05:43 +0000 (05:05 +0400)
diff --git a/modules/gpu/src/cuda/isf-sc.cu b/modules/gpu/src/cuda/isf-sc.cu

index 714bdfa..c8dff34 100644 (file)
--- a/modules/gpu/src/cuda/isf-sc.cu
+++ b/modules/gpu/src/cuda/isf-sc.cu
@@ -42,18 +42,17 @@
  
  #include <opencv2/gpu/device/common.hpp>
  #include <icf.hpp>
-// #include <opencv2/gpu/device/saturate_cast.hpp>
  #include <stdio.h>
-// #include <float.h>
+#include <float.h>
  
-// //#define LOG_CUDA_CASCADE
+// #define LOG_CUDA_CASCADE
  
-// #if defined LOG_CUDA_CASCADE
-// # define dprintf(format, ...) \
-//             do { printf(format, __VA_ARGS__); } while (0)
-// #else
-// # define dprintf(format, ...)
-// #endif
+#if defined LOG_CUDA_CASCADE
+# define dprintf(format, ...) \
+            do { printf(format, __VA_ARGS__); } while (0)
+#else
+# define dprintf(format, ...)
+#endif
  
  namespace cv { namespace gpu { namespace device {
  namespace icf {
@@ -94,32 +93,128 @@ namespace icf {
          cudaSafeCall( cudaDeviceSynchronize() );
      }
  
-    texture<float2,  cudaTextureType1D, cudaReadModeElementType> tnode;
+    texture<int,  cudaTextureType2D, cudaReadModeElementType> thogluv;
+    // ToDo: do it in load time
+    // __device__ __forceinline__ float rescale(const Level& level, uchar4& scaledRect, const Node& node)
+    // {
+    //     scaledRect = node.rect;
+    //     return (float)(node.threshold & 0x0FFFFFFFU);
+    // }
+
+    __device__ __forceinline__ float rescale(const Level& level, uchar4& scaledRect, const Node& node)
+    {
+        float relScale = level.relScale;
+        float farea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
+
+        dprintf("feature %d box %d %d %d %d\n", (node.threshold >> 28), scaledRect.x, scaledRect.y,
+            scaledRect.z, scaledRect.w);
+        dprintf("rescale: %f [%f %f] selected %f\n",level.relScale, level.scaling[0], level.scaling[1],
+            level.scaling[(node.threshold >> 28) > 6]);
+
+        // rescale
+        scaledRect.x = __float2int_rn(relScale * scaledRect.x);
+        scaledRect.y = __float2int_rn(relScale * scaledRect.y);
+        scaledRect.z = __float2int_rn(relScale * scaledRect.z);
+        scaledRect.w = __float2int_rn(relScale * scaledRect.w);
+
+        float sarea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
+
+        float approx = 1.f;
+        // if (fabs(farea - 0.f) > FLT_EPSILON && fabs(farea - 0.f) > FLT_EPSILON)
+        {
+            const float expected_new_area = farea * relScale * relScale;
+            approx =  sarea / expected_new_area;
+        }
+
+        dprintf("new rect: %d box %d %d %d %d  rel areas %f %f\n", (node.threshold >> 28),
+        scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea);
+
+
+        float rootThreshold = (node.threshold & 0x0FFFFFFFU) * approx;
+        rootThreshold *= level.scaling[(node.threshold >> 28) > 6];
+
+        dprintf("approximation %f %d -> %f %f\n", approx, (node.threshold & 0x0FFFFFFFU), rootThreshold,
+            level.scaling[(node.threshold >> 28) > 6]);
+
+        return rootThreshold;
+    }
+
+    __device__ __forceinline__ int get(const int x, int y, int channel, uchar4 area)
+    {
+
+        dprintf("feature box %d %d %d %d ", area.x, area.y, area.z, area.w);
+        dprintf("get for channel %d\n", channel);
+        dprintf("extract feature for: [%d %d] [%d %d] [%d %d] [%d %d]\n",
+            x + area.x, y + area.y,  x + area.z, y + area.y,  x + area.z,y + area.w,
+            x + area.x, y + area.w);
+        dprintf("at point %d %d with offset %d\n", x, y, 0);
+
+        int offset = channel * 121;
+        y += offset;
+
+        int a = tex2D(thogluv, x + area.x, y + area.y);
+        int b = tex2D(thogluv, x + area.z, y + area.y);
+        int c = tex2D(thogluv, x + area.z, y + area.w);
+        int d = tex2D(thogluv, x + area.x, y + area.w);
+
+        dprintf("    retruved integral values: %d %d %d %d\n", a, b, c, d);
+
+        return (a - b + c - d);
+    }
+
      __global__ void test_kernel(const Level* levels, const Octave* octaves, const float* stages,
-        const Node* nodes,
-        PtrStepSz<uchar4> objects)
+        const Node* nodes, const float* leaves, PtrStepSz<uchar4> objects)
      {
          const int y = blockIdx.y * blockDim.y + threadIdx.y;
          const int x = blockIdx.x * blockDim.x + threadIdx.x;
          Level level = levels[blockIdx.z];
+
+        // if (x > 0 || y > 0 || blockIdx.z > 0) return;
          if(x >= level.workRect.x || y >= level.workRect.y) return;
  
          Octave octave = octaves[level.octave];
+
          int st = octave.index * octave.stages;
          const int stEnd = st + 1000;//octave.stages;
  
          float confidence = 0.f;
  
-#pragma unroll 8
+// #pragma unroll 8
          for(; st < stEnd; ++st)
          {
+            dprintf("\n\nstage: %d\n", st);
              const int nId = st * 3;
-            const Node node = nodes[nId];
+            Node node = nodes[nId];
+
+            dprintf("Node: [%d %d %d %d] %d %d\n", node.rect.x, node.rect.y, node.rect.z, node.rect.w,
+                node.threshold >> 28, node.threshold & 0x0FFFFFFFU);
+
+            float threshold = rescale(level, node.rect, node);
+            int sum = get(x, y, (node.threshold >> 28), node.rect);
+
+            dprintf("Node: [%d %d %d %d] %f\n", node.rect.x, node.rect.y, node.rect.z,
+                node.rect.w, threshold);
+
+            int next = 1 + (int)(sum >= threshold);
+            dprintf("go: %d (%d >= %f)\n\n" ,next, sum, threshold);
+
+            node = nodes[nId + next];
+            threshold = rescale(level, node.rect, node);
+            sum = get(x, y, (node.threshold >> 28), node.rect);
  
-            const float stage = stages[st];
-            confidence += node.rect.x * stage;
+            const int lShift = (next - 1) * 2 + (int)(sum >= threshold);
+            float impact = leaves[st * 4 + lShift];
+            confidence += impact;
+
+            if (confidence <= stages[st]) st = stEnd + 1;
+            dprintf("decided: %d (%d >= %f) %d %f\n\n" ,next, sum, threshold, lShift, impact);
+            dprintf("extracted stage: %f\n", stages[st]);
+            dprintf("computed  score: %f\n\n", confidence);
          }
  
+        // if (st == stEnd)
+        //     printf("%d %d %d\n", x, y, st);
+
          uchar4 val;
          val.x = (int)confidence;
          if (x == y) objects(0, threadIdx.x) = val;
@@ -127,188 +222,27 @@ namespace icf {
      }
  
      void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
-        const PtrStepSzb& nodes, const PtrStepSzb& features,
-        PtrStepSz<uchar4> objects)
+        const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects)
      {
          int fw = 160;
          int fh = 120;
+
          dim3 block(32, 8);
          dim3 grid(fw / 32, fh / 8, 47);
+
          const Level* l = (const Level*)levels.ptr();
          const Octave* oct = ((const Octave*)octaves.ptr());
          const float* st = (const float*)stages.ptr();
          const Node* nd = (const Node*)nodes.ptr();
-        // cudaSafeCall( cudaBindTexture(0, tnode, nodes.data, rgb.cols / size) );
+        const float* lf = (const float*)leaves.ptr();
+
+        cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
+        cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
  
-        test_kernel<<<grid, block>>>(l, oct, st, nd, objects);
+        test_kernel<<<grid, block>>>(l, oct, st, nd, lf, objects);
  
          cudaSafeCall( cudaGetLastError());
          cudaSafeCall( cudaDeviceSynchronize());
      }
  }
-}}}
-
-// __global__ void detect(const cv::gpu::icf::Cascade cascade, const int* __restrict__ hogluv, const int pitch,
-//     PtrStepSz<uchar4> objects)
-// {
-//     cascade.detectAt(hogluv, pitch, objects);
-// }
-
-// }
-
-// float __device icf::Cascade::rescale(const icf::Level& level, uchar4& scaledRect,
-//                                      const int channel, const float threshold) const
-// {
-//     dprintf("feature %d box %d %d %d %d\n", channel, scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w);
-//     dprintf("rescale: %f [%f %f]\n",level.relScale, level.scaling[0], level.scaling[1]);
-
-//     float relScale = level.relScale;
-//     float farea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
-
-//     // rescale
-//     scaledRect.x = __float2int_rn(relScale * scaledRect.x);
-//     scaledRect.y = __float2int_rn(relScale * scaledRect.y);
-//     scaledRect.z = __float2int_rn(relScale * scaledRect.z);
-//     scaledRect.w = __float2int_rn(relScale * scaledRect.w);
-
-//     float sarea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
-
-
-//     float approx = 1.f;
-//     if (fabs(farea - 0.f) > FLT_EPSILON && fabs(farea - 0.f) > FLT_EPSILON)
-//     {
-//         const float expected_new_area = farea * relScale * relScale;
-//         approx = expected_new_area / sarea;
-//     }
-
-//     dprintf("new rect: %d box %d %d %d %d  rel areas %f %f\n", channel,
-//         scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea);
-
-//     // compensation areas rounding
-//     float rootThreshold = threshold / approx;
-//     // printf("    approx %f\n", rootThreshold);
-//     rootThreshold *= level.scaling[(int)(channel > 6)];
-
-//     dprintf("approximation %f %f -> %f %f\n", approx, threshold, rootThreshold, level.scaling[(int)(channel > 6)]);
-
-//     return rootThreshold;
-// }
-
-// typedef unsigned char uchar;
-// float __device get(const int* __restrict__ hogluv, const int pitch,
-//                    const int x, const int y, int channel, uchar4 area)
-// {
-//     dprintf("feature box %d %d %d %d ", area.x, area.y, area.z, area.w);
-//     dprintf("get for channel %d\n", channel);
-//     dprintf("extract feature for: [%d %d] [%d %d] [%d %d] [%d %d]\n",
-//         x + area.x, y + area.y,  x + area.z, y + area.y,  x + area.z,y + area.w,
-//         x + area.x, y + area.w);
-//     dprintf("at point %d %d with offset %d\n", x, y, 0);
-
-//     const int* curr = hogluv + ((channel * 121) + y) * pitch;
-
-//     int a = curr[area.y * pitch + x + area.x];
-//     int b = curr[area.y * pitch + x + area.z];
-//     int c = curr[area.w * pitch + x + area.z];
-//     int d = curr[area.w * pitch + x + area.x];
-
-//     dprintf("    retruved integral values: %d %d %d %d\n", a, b, c, d);
-
-//     return (a - b + c - d);
-// }
-
-
-// void __device icf::Cascade::detectAt(const int* __restrict__ hogluv, const int pitch,
-//                                     PtrStepSz<uchar4>& objects) const
-// {
-//     const icf::Level* lls = (const icf::Level*)levels.ptr();
-
-//     const int y = blockIdx.y * blockDim.y + threadIdx.y;
-//     const int x = blockIdx.x * blockDim.x + threadIdx.x;
-//     // if (x > 0 || y > 0) return;
-
-//     Level level = lls[blockIdx.z];
-//     if (x >= level.workRect.x || y >= level.workRect.y) return;
-
-//     dprintf("level: %d (%f %f) [%f %f] (%d %d) (%d %d)\n", level.octave, level.relScale, level.shrScale,
-//         level.scaling[0], level.scaling[1], level.workRect.x, level.workRect.y, level.objSize.x, level.objSize.y);
-
-//     const Octave octave = ((const Octave*)octaves.ptr())[level.octave];
-//     // printf("Octave: %d %d %d (%d %d) %f\n", octave.index, octave.stages,
-//     //     octave.shrinkage, octave.size.x, octave.size.y, octave.scale);
-
-//     const int stBegin = octave.index * octave.stages, stEnd = stBegin + octave.stages;
-
-//     float detectionScore = 0.f;
-
-//     int st = stBegin;
-//     for(; st < stEnd; ++st)
-//     {
-//         const float stage = stages(0, st);
-//         dprintf("Stage: %f\n", stage);
-//         {
-//             const int nId = st * 3;
-
-//             // work with root node
-//             const Node node = ((const Node*)nodes.ptr())[nId];
-
-//             dprintf("Node: %d %f\n", node.feature, node.threshold);
-
-//             const Feature feature = ((const Feature*)features.ptr())[node.feature];
-
-//             uchar4 scaledRect = feature.rect;
-//             float threshold = rescale(level, scaledRect, feature.channel, node.threshold);
-
-//             float sum = get(hogluv,pitch, x, y, feature.channel, scaledRect);
-
-//             dprintf("root feature %d %f\n",feature.channel, sum);
-
-//             int next = 1 + (int)(sum >= threshold);
-
-//             dprintf("go: %d (%f >= %f)\n\n" ,next, sum, threshold);
-
-//             // leaves
-//             const Node leaf = ((const Node*)nodes.ptr())[nId + next];
-//             const Feature fLeaf = ((const Feature*)features.ptr())[leaf.feature];
-
-//             scaledRect = fLeaf.rect;
-//             threshold = rescale(level, scaledRect, fLeaf.channel, leaf.threshold);
-//             sum = get(hogluv, pitch, x, y, fLeaf.channel, scaledRect);
-
-//             const int lShift = (next - 1) * 2 + (int)(sum >= threshold);
-//             float impact = leaves(0, (st * 4) + lShift);
-
-//             detectionScore += impact;
-
-//             dprintf("decided: %d (%f >= %f) %d %f\n\n" ,next, sum, threshold, lShift, impact);
-//             dprintf("extracted stage:\n");
-//             dprintf("ct %f\n", stage);
-//             dprintf("computed score %f\n\n", detectionScore);
-//             dprintf("\n\n");
-//         }
-
-//         if (detectionScore <= stage || st - stBegin == 100) break;
-//     }
-
-//     dprintf("x %d y %d: %d\n", x, y, st - stBegin);
-
-//     if (st == stEnd)
-//     {
-//         uchar4 a;
-//         a.x = level.workRect.x;
-//         a.y = level.workRect.y;
-//         objects(0, threadIdx.x) = a;
-//     }
-// }
-
-// void icf::Cascade::detect(const cv::gpu::PtrStepSzi& hogluv, PtrStepSz<uchar4> objects, cudaStream_t stream) const
-// {
-//     dim3 block(32, 8, 1);
-//     dim3 grid(ChannelStorage::FRAME_WIDTH / 32, ChannelStorage::FRAME_HEIGHT / 8, 47);
-//     device::detect<<<grid, block, 0, stream>>>(*this, hogluv, hogluv.step / sizeof(int), objects);
-//     cudaSafeCall( cudaGetLastError() );
-//     if (!stream)
-//         cudaSafeCall( cudaDeviceSynchronize() );
-// }
-
-// }}
-\ No newline at end of file
+}}}
+\ No newline at end of file
diff --git a/modules/gpu/src/icf.hpp b/modules/gpu/src/icf.hpp

index 51ea2c0..ecd1886 100644 (file)
--- a/modules/gpu/src/icf.hpp
+++ b/modules/gpu/src/icf.hpp
@@ -40,11 +40,13 @@
  //
  //M
  
-#include <opencv2/gpu/device/common.hpp>
  
  #ifndef __OPENCV_ICF_HPP__
  #define __OPENCV_ICF_HPP__
  
+#include <opencv2/gpu/device/common.hpp>
+#include <stdio.h>
+
  // #if defined __CUDACC__
  // # define __device __device__ __forceinline__
  // #else
@@ -92,20 +94,27 @@ struct __align__(8) Level //is actually 24 bytes
  
  struct __align__(8) Node
  {
-    // int feature;
      uchar4 rect;
-    float threshold;
+    // ushort channel;
+    uint threshold;
  
-    Node(const uchar4 c, const int t) : rect(c), threshold(t) {}
+    enum { THRESHOLD_MASK = 0x0FFFFFFF };
+
+    Node(const uchar4 r, const uint ch, const uint t) : rect(r), threshold(t + (ch << 28))
+    {
+        // printf("%d\n", t);
+        // printf("[%d %d %d %d] %d, %d\n",rect.x, rect.y, rect.z, rect.w, (int)(threshold >> 28),
+        //     (int)(0x0FFFFFFF & threshold));
+    }
  };
  
-struct __align__(8) Feature
-{
-    int channel;
-    uchar4 rect;
+// struct __align__(8) Feature
+// {
+//     int channel;
+//     uchar4 rect;
  
-    Feature(const int c, const uchar4 r) : channel(c), rect(r) {}
-};
+//     Feature(const int c, const uchar4 r) : channel(c), rect(r) {}
+// };
  }
  }}}
  // struct Cascade
diff --git a/modules/gpu/src/softcascade.cpp b/modules/gpu/src/softcascade.cpp

index 8d75176..ffbf380 100644 (file)
--- a/modules/gpu/src/softcascade.cpp
+++ b/modules/gpu/src/softcascade.cpp
@@ -60,19 +60,10 @@ namespace icf {
      void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
          const int fw, const int fh, const int bins);
      void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
-        const PtrStepSzb& nodes, const PtrStepSzb& features,
-        PtrStepSz<uchar4> objects);
+        const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects);
  }
  }}}
  
-// namespace {
-//     char *itoa(long i, char* s, int /*dummy_radix*/)
-//     {
-//         sprintf(s, "%ld", i);
-//         return s;
-//     }
-// }
-
  struct cv::gpu::SoftCascade::Filds
  {
  
@@ -97,7 +88,6 @@ struct cv::gpu::SoftCascade::Filds
      GpuMat stages;
      GpuMat nodes;
      GpuMat leaves;
-    GpuMat features;
      GpuMat levels;
  
      // preallocated buffer 640x480x10 for hogluv + 640x480 got gray
@@ -137,7 +127,7 @@ struct cv::gpu::SoftCascade::Filds
      bool fill(const FileNode &root, const float mins, const float maxs);
      void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const
      {
-        device::icf::detect(levels, octaves, stages, nodes, features, objects);
+        device::icf::detect(levels, octaves, stages, nodes, leaves, hogluv, objects);
      }
  
  private:
@@ -216,10 +206,9 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float
          if (fn.empty()) return false;
  
      std::vector<Octave>  voctaves;
-    std::vector<float>        vstages;
+    std::vector<float>   vstages;
      std::vector<Node>    vnodes;
-    std::vector<float>        vleaves;
-    std::vector<Feature> vfeatures;
+    std::vector<float>   vleaves;
      scales.clear();
  
      FileNodeIterator it = fn.begin(), it_end = fn.end();
@@ -245,6 +234,8 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float
          FileNode ffs = fns[SC_FEATURES];
          if (ffs.empty()) return false;
  
+        FileNodeIterator ftrs = ffs.begin();
+
          fns = fns[SC_STAGES];
          if (fn.empty()) return false;
  
@@ -263,10 +254,21 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float
                  FileNodeIterator inIt = fns.begin(), inIt_end = fns.end();
                  for (; inIt != inIt_end;)
                  {
-                    int feature = (int)(*(inIt +=2)++) + feature_offset;
-                    float th = (float)(*(inIt++));
+                    // int feature = (int)(*(inIt +=2)) + feature_offset;
+                    inIt +=3;
+                    // extract feature, Todo:check it
+                    uint th = saturate_cast<uint>((float)(*(inIt++)));
+                    cv::FileNode ftn = (*ftrs)[SC_F_RECT];
+                    cv::FileNodeIterator r_it = ftn.begin();
                      uchar4 rect;
-                    vnodes.push_back(Node(rect, th));
+                    rect.x = saturate_cast<uchar>((int)*(r_it++));
+                    rect.y = saturate_cast<uchar>((int)*(r_it++));
+                    rect.z = saturate_cast<uchar>((int)*(r_it++));
+                    rect.w = saturate_cast<uchar>((int)*(r_it++));
+
+                    uint channel = saturate_cast<uint>((int)(*ftrs)[SC_F_CHANNEL]);
+                    vnodes.push_back(Node(rect, channel, th));
+                    ++ftrs;
                  }
  
                  fns = (*ftr)[SC_LEAF];
@@ -276,19 +278,6 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float
              }
          }
  
-        st = ffs.begin(), st_end = ffs.end();
-        for (; st != st_end; ++st )
-        {
-            cv::FileNode rn = (*st)[SC_F_RECT];
-            cv::FileNodeIterator r_it = rn.begin();
-            uchar4 rect;
-            rect.x = saturate_cast<uchar>((int)*(r_it++));
-            rect.y = saturate_cast<uchar>((int)*(r_it++));
-            rect.z = saturate_cast<uchar>((int)*(r_it++));
-            rect.w = saturate_cast<uchar>((int)*(r_it++));
-            vfeatures.push_back(Feature((int)(*st)[SC_F_CHANNEL], rect));
-        }
-
          feature_offset += octave.stages * 3;
          ++octIndex;
      }
@@ -306,9 +295,6 @@ inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float
      leaves.upload(cv::Mat(vleaves).reshape(1,1));
      CV_Assert(!leaves.empty());
  
-    features.upload(cv::Mat(1, vfeatures.size() * sizeof(Feature), CV_8UC1, (uchar*)&(vfeatures[0]) ));
-    CV_Assert(!features.empty());
-
      // compute levels
      calcLevels(voctaves, FRAME_WIDTH, FRAME_HEIGHT, TOTAL_SCALES);
      CV_Assert(!levels.empty());
@@ -425,7 +411,14 @@ bool cv::gpu::SoftCascade::load( const string& filename, const float minScale, c
      return true;
  }
  
-// #define USE_REFERENCE_VALUES
+#define USE_REFERENCE_VALUES
+namespace {
+    char *itoa(long i, char* s, int /*dummy_radix*/)
+    {
+        sprintf(s, "%ld", i);
+        return s;
+    }
+}
  void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat& /*rois*/,
                                  GpuMat& objects, const int /*rejectfactor*/, Stream s)
  {
@@ -438,17 +431,20 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat&
      Filds& flds = *filds;
  
  #if defined USE_REFERENCE_VALUES
-//     cudaMemset(flds.hogluv.data, 0, flds.hogluv.step * flds.hogluv.rows);
-//     cv::FileStorage imgs("/home/kellan/testInts.xml", cv::FileStorage::READ);
-//     char buff[33];
-
-//     for(int i = 0; i < Filds::HOG_LUV_BINS; ++i)
-//     {
-//         cv::Mat channel;
-//         imgs[std::string("channel") + itoa(i, buff, 10)] >> channel;
-//         GpuMat gchannel(flds.hogluv, cv::Rect(0, 121 * i, 161, 121));
-//         gchannel.upload(channel);
-//     }
+    cudaMemset(flds.hogluv.data, 0, flds.hogluv.step * flds.hogluv.rows);
+
+    cv::FileStorage imgs("/home/kellan/testInts.xml", cv::FileStorage::READ);
+    char buff[33];
+
+    for(int i = 0; i < Filds::HOG_LUV_BINS; ++i)
+    {
+        cv::Mat channel;
+        imgs[std::string("channel") + itoa(i, buff, 10)] >> channel;
+
+        // std::cout << "channel " << i << std::endl << channel << std::endl;
+        GpuMat gchannel(flds.hogluv, cv::Rect(0, 121 * i, 161, 121));
+        gchannel.upload(channel);
+    }
  #else
      GpuMat& plane = flds.plane;
      GpuMat& shrunk = flds.shrunk;
author	marina.kolpakova <marina.kolpakova@itseez.com>
	Thu, 27 Sep 2012 08:44:06 +0000 (12:44 +0400)
committer	marina.kolpakova <marina.kolpakova@itseez.com>
	Sat, 10 Nov 2012 01:05:43 +0000 (05:05 +0400)
modules/gpu/src/cuda/isf-sc.cu		patch \| blob \| history
modules/gpu/src/icf.hpp		patch \| blob \| history
modules/gpu/src/softcascade.cpp		patch \| blob \| history