From: lluis Date: Tue, 17 Sep 2013 21:45:23 +0000 (+0200) Subject: Added erGrouping function: Find groups of Extremal Regions that are organized as... X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~3757^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2837bfd9fa13e6f2716b9a2198af6c4546b64cc6;p=platform%2Fupstream%2Fopencv.git Added erGrouping function: Find groups of Extremal Regions that are organized as text blocks. Updated sample/cpp to use the complete text detection pipeline --- diff --git a/modules/objdetect/include/opencv2/objdetect/erfilter.hpp b/modules/objdetect/include/opencv2/objdetect/erfilter.hpp index 69809a8..bbf1b54 100644 --- a/modules/objdetect/include/opencv2/objdetect/erfilter.hpp +++ b/modules/objdetect/include/opencv2/objdetect/erfilter.hpp @@ -236,5 +236,28 @@ enum { ERFILTER_NM_RGBLGrad = 0, */ CV_EXPORTS void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad); + +/*! + Find groups of Extremal Regions that are organized as text blocks. This function implements + the grouping algorithm described in: + Gomez L. and Karatzas D.: Multi-script Text Extraction from Natural Scenes, ICDAR 2013. + Notice that this implementation constrains the results to horizontally-aligned text and + latin script (since ERFilter classifiers are trained only for latin script detection). + + The algorithm combines two different clustering techniques in a single parameter-free procedure + to detect groups of regions organized as text. The maximally meaningful groups are fist detected + in several feature spaces, where each feature space is a combination of proximity information + (x,y coordinates) and a similarity measure (intensity, color, size, gradient magnitude, etc.), + thus providing a set of hypotheses of text groups. Evidence Accumulation framework is used to + combine all these hypotheses to get the final estimate. Each of the resulting groups are finally + heuristically validated in order to assest if they form a valid horizontally-aligned text block. + + \param src Vector of sinle channel images CV_8UC1 from wich the regions were extracted. + \param regions Vector of ER's retreived from the ERFilter algorithm from each channel + \param groups The output of the algorithm are stored in this parameter as list of rectangles. +*/ +CV_EXPORTS void erGrouping(InputArrayOfArrays src, std::vector > ®ions, + std::vector &groups); + } #endif // _OPENCV_ERFILTER_HPP_ diff --git a/modules/objdetect/src/erfilter.cpp b/modules/objdetect/src/erfilter.cpp index 6e19b34..ce07443 100644 --- a/modules/objdetect/src/erfilter.cpp +++ b/modules/objdetect/src/erfilter.cpp @@ -43,6 +43,11 @@ #include "precomp.hpp" #include +#ifndef INT32_MAX +#define __STDC_LIMIT_MACROS +#include +#endif + using namespace std; namespace cv @@ -1249,4 +1254,1926 @@ void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode gradient_magnitude.copyTo(channelGrad); } } + + + +/* ------------------------------------------------------------------------------------*/ +/* -------------------------------- ER Grouping Algorithm -----------------------------*/ +/* ------------------------------------------------------------------------------------*/ + + +/* NFA approximation functions */ + +// ln(10) +#ifndef M_LN10 +#define M_LN10 2.30258509299404568401799145468436421 +#endif +// Doubles relative error factor +#define RELATIVE_ERROR_FACTOR 100.0 + +// Compare doubles by relative error. +static int double_equal(double a, double b) +{ + double abs_diff,aa,bb,abs_max; + + /* trivial case */ + if( a == b ) return true; + + abs_diff = fabs(a-b); + aa = fabs(a); + bb = fabs(b); + abs_max = aa > bb ? aa : bb; + + /* DBL_MIN is the smallest normalized number, thus, the smallest + number whose relative error is bounded by DBL_EPSILON. For + smaller numbers, the same quantization steps as for DBL_MIN + are used. Then, for smaller numbers, a meaningful "relative" + error should be computed by dividing the difference by DBL_MIN. */ + if( abs_max < DBL_MIN ) abs_max = DBL_MIN; + + /* equal if relative error <= factor x eps */ + return (abs_diff / abs_max) <= (RELATIVE_ERROR_FACTOR * DBL_EPSILON); +} + + +/* + Computes the natural logarithm of the absolute value of + the gamma function of x using the Lanczos approximation. + See http://www.rskey.org/gamma.htm +*/ +static double log_gamma_lanczos(double x) +{ + static double q[7] = { 75122.6331530, 80916.6278952, 36308.2951477, + 8687.24529705, 1168.92649479, 83.8676043424, + 2.50662827511 }; + double a = (x+0.5) * log(x+5.5) - (x+5.5); + double b = 0.0; + int n; + + for(n=0;n<7;n++) + { + a -= log( x + (double) n ); + b += q[n] * pow( x, (double) n ); + } + return a + log(b); +} + +/* + Computes the natural logarithm of the absolute value of + the gamma function of x using Windschitl method. + See http://www.rskey.org/gamma.htm +*/ +static double log_gamma_windschitl(double x) +{ + return 0.918938533204673 + (x-0.5)*log(x) - x + + 0.5*x*log( x*sinh(1/x) + 1/(810.0*pow(x,6.0)) ); +} + +/* + Computes the natural logarithm of the absolute value of + the gamma function of x. When x>15 use log_gamma_windschitl(), + otherwise use log_gamma_lanczos(). +*/ +#define log_gamma(x) ((x)>15.0?log_gamma_windschitl(x):log_gamma_lanczos(x)) + +// Size of the table to store already computed inverse values. +#define TABSIZE 100000 + +/* + Computes -log10(NFA). + NFA stands for Number of False Alarms: +*/ +static double NFA(int n, int k, double p, double logNT) +{ + static double inv[TABSIZE]; /* table to keep computed inverse values */ + double tolerance = 0.1; /* an error of 10% in the result is accepted */ + double log1term,term,bin_term,mult_term,bin_tail,err,p_term; + int i; + + if (p<=0) + p=0.000000000000000000000000000001; + if (p>=1) + p=0.999999999999999999999999999999; + + /* check parameters */ + if( n<0 || k<0 || k>n || p<=0.0 || p>=1.0 ) + { + CV_Error(CV_StsBadArg, "erGrouping wrong n, k or p values in NFA call!"); + } + + /* trivial cases */ + if( n==0 || k==0 ) return -logNT; + if( n==k ) return -logNT - (double) n * log10(p); + + /* probability term */ + p_term = p / (1.0-p); + + /* compute the first term of the series */ + log1term = log_gamma( (double) n + 1.0 ) - log_gamma( (double) k + 1.0 ) + - log_gamma( (double) (n-k) + 1.0 ) + + (double) k * log(p) + (double) (n-k) * log(1.0-p); + term = exp(log1term); + + /* in some cases no more computations are needed */ + if( double_equal(term,0.0) ) /* the first term is almost zero */ + { + if( (double) k > (double) n * p ) /* at begin or end of the tail? */ + return -log1term / M_LN10 - logNT; /* end: use just the first term */ + else + return -logNT; /* begin: the tail is roughly 1 */ + } + + /* compute more terms if needed */ + bin_tail = term; + for(i=k+1;i<=n;i++) + { + bin_term = (double) (n-i+1) * ( i edge_begin; + vector edge_end; + bool initialized; + +public: + // creates an empty box + Minibox(); + + // copies p to the internal point set + void check_in (vector *p); + + // returns the volume of the box + long double volume(); +}; + +Minibox::Minibox() +{ + initialized = false; +} + +void Minibox::check_in (vector *p) +{ + if(!initialized) for (int i=0; i<(int)p->size(); i++) + { + edge_begin.push_back(p->at(i)); + edge_end.push_back(p->at(i)+0.00000000000000001f); + initialized = true; + } + else for (int i=0; i<(int)p->size(); i++) + { + edge_begin.at(i) = min(p->at(i),edge_begin.at(i)); + edge_end.at(i) = max(p->at(i),edge_end.at(i)); + } +} + +long double Minibox::volume () +{ + long double volume_ = 1; + for (int i=0; i<(int)edge_begin.size(); i++) + { + volume_ = volume_ * (edge_end.at(i) - edge_begin.at(i)); + } + return (volume_); +} + + +#define MAX_NUM_FEATURES 9 + + +/* Hierarchical Clustering classes and functions */ + + +// Hierarchical Clustering linkage variants +enum method_codes +{ + METHOD_METR_SINGLE = 0, + METHOD_METR_AVERAGE = 1 +}; + +#ifndef INT32_MAX +#define MAX_INDEX 0x7fffffffL +#else +#define MAX_INDEX INT32_MAX +#endif + +// A node in the hierarchical clustering algorithm +struct node { + int_fast32_t node1, node2; + double dist; + + inline friend bool operator< (const node a, const node b) + { + // Numbers are always smaller than NaNs. + return a.dist < b.dist || (a.dist==a.dist && b.dist!=b.dist); + } +}; + +// self-destructing array pointer +template +class auto_array_ptr { +private: + type * ptr; +public: + auto_array_ptr() { ptr = NULL; } + template + auto_array_ptr(index const size) { init(size); } + template + auto_array_ptr(index const size, value const val) { init(size, val); } + + ~auto_array_ptr() + { + delete [] ptr; + } + void free() { + delete [] ptr; + ptr = NULL; + } + template + void init(index const size) + { + ptr = new type [size]; + } + template + void init(index const size, value const val) + { + init(size); + for (index i=0; i Z; + int_fast32_t pos; + +public: + cluster_result(const int_fast32_t size): Z(size) + { + pos = 0; + } + + void append(const int_fast32_t node1, const int_fast32_t node2, const double dist) + { + Z[pos].node1 = node1; + Z[pos].node2 = node2; + Z[pos].dist = dist; + pos++; + } + + node * operator[] (const int_fast32_t idx) const { return Z + idx; } + + void sqrt() const + { + for (int_fast32_t i=0; i succ; + +private: + auto_array_ptr pred; + +public: + doubly_linked_list(const int_fast32_t size): succ(size+1), pred(size+1) + { + for (int_fast32_t i=0; i(2*N-3-(r_))*(r_)>>1)+(c_)-1] ) +// Z is an ((N-1)x4)-array +#define Z_(_r, _c) (Z[(_r)*4 + (_c)]) + +/* + Lookup function for a union-find data structure. + + The function finds the root of idx by going iteratively through all + parent elements until a root is found. An element i is a root if + nodes[i] is zero. To make subsequent searches faster, the entry for + idx and all its parents is updated with the root element. +*/ +class union_find { +private: + auto_array_ptr parent; + int_fast32_t nextparent; + +public: + void init(const int_fast32_t size) + { + parent.init(2*size-1, 0); + nextparent = size; + } + + int_fast32_t Find (int_fast32_t idx) const + { + if (parent[idx] !=0 ) // a -> b + { + int_fast32_t p = idx; + idx = parent[idx]; + if (parent[idx] !=0 ) // a -> b -> c + { + do + { + idx = parent[idx]; + } while (parent[idx] != 0); + do + { + int_fast32_t tmp = parent[p]; + parent[p] = idx; + p = tmp; + } while (parent[p] != idx); + } + } + return idx; + } + + void Union (const int_fast32_t node1, const int_fast32_t node2) + { + parent[node1] = parent[node2] = nextparent++; + } +}; + + +/* Functions for the update of the dissimilarity array */ + +inline static void f_single( double * const b, const double a ) +{ + if (*b > a) *b = a; +} +inline static void f_average( double * const b, const double a, const double s, const double t) +{ + *b = s*a + t*(*b); +} + +/* + This is the NN-chain algorithm. + + N: integer + D: condensed distance matrix N*(N-1)/2 + Z2: output data structure +*/ +template +static void NN_chain_core(const int_fast32_t N, double * const D, t_members * const members, cluster_result & Z2) +{ + int_fast32_t i; + + auto_array_ptr NN_chain(N); + int_fast32_t NN_chain_tip = 0; + + int_fast32_t idx1, idx2; + + double size1, size2; + doubly_linked_list active_nodes(N); + + double min; + + for (int_fast32_t j=0; jidx2) + { + int_fast32_t tmp = idx1; + idx1 = idx2; + idx2 = tmp; + } + + //if ( method == METHOD_METR_AVERAGE ) + { + size1 = static_cast(members[idx1]); + size2 = static_cast(members[idx2]); + members[idx2] += members[idx1]; + } + + // Remove the smaller index from the valid indices (active_nodes). + active_nodes.remove(idx1); + + switch (method) { + case METHOD_METR_SINGLE: + /* + Single linkage. + */ + // Update the distance matrix in the range [start, idx1). + for (i=active_nodes.start; i +static void MST_linkage_core_vector(const int_fast32_t N, + t_dissimilarity & dist, + cluster_result & Z2) { +/* + Hierarchical clustering using the minimum spanning tree + + N: integer, number of data points + dist: function pointer to the metric + Z2: output data structure +*/ + int_fast32_t i; + int_fast32_t idx2; + doubly_linked_list active_nodes(N); + auto_array_ptr d(N); + + int_fast32_t prev_node; + double min; + + // first iteration + idx2 = 1; + min = d[1] = dist(0,1); + for (i=2; min!=min && i tmp) + d[i] = tmp; + if (d[i] < min) + { + min = d[i]; + idx2 = i; + } + } + Z2.append(prev_node, idx2, min); + } +} + +class linkage_output { +private: + double * Z; + int_fast32_t pos; + +public: + linkage_output(double * const _Z) + { + this->Z = _Z; + pos = 0; + } + + void append(const int_fast32_t node1, const int_fast32_t node2, const double dist, const double size) + { + if (node1(node1); + Z[pos++] = static_cast(node2); + } else { + Z[pos++] = static_cast(node2); + Z[pos++] = static_cast(node1); + } + Z[pos++] = dist; + Z[pos++] = size; + } +}; + + +/* + Generate the specific output format for a dendrogram from the + clustering output. + + The list of merging steps can be sorted or unsorted. +*/ + +// The size of a node is either 1 (a single point) or is looked up from +// one of the clusters. +#define size_(r_) ( ((r_node1); + node2 = nodes.Find(Z2[i]->node2); + // Merge the nodes in the union-find data structure by making them + // children of a new node. + nodes.Union(node1, node2); + output.append(node1, node2, Z2[i]->dist, size_(node1)+size_(node2)); + } +} + +/* + Clustering on vector data +*/ + +enum { + // metrics + METRIC_EUCLIDEAN = 0, + METRIC_CITYBLOCK = 1, + METRIC_SEUCLIDEAN = 2, + METRIC_SQEUCLIDEAN = 3 +}; + +/* + This class handles all the information about the dissimilarity + computation. +*/ +class dissimilarity { +private: + double * Xa; + auto_array_ptr Xnew; + std::ptrdiff_t dim; // size_t saves many statis_cast<> in products + int_fast32_t N; + int_fast32_t * members; + void (cluster_result::*postprocessfn) (const double) const; + double postprocessarg; + + double (dissimilarity::*distfn) (const int_fast32_t, const int_fast32_t) const; + + auto_array_ptr precomputed; + double * precomputed2; + + double * V; + const double * V_data; + +public: + dissimilarity (double * const _Xa, int _Num, int _dim, + int_fast32_t * const _members, + const unsigned char method, + const unsigned char metric, + bool temp_point_array) + : Xa(_Xa), + dim(_dim), + N(_Num), + members(_members), + postprocessfn(NULL), + V(NULL) + { + switch (method) { + case METHOD_METR_SINGLE: // only single linkage allowed here but others may come... + default: + postprocessfn = NULL; // default + switch (metric) + { + case METRIC_EUCLIDEAN: + set_euclidean(); + break; + case METRIC_SEUCLIDEAN: + case METRIC_SQEUCLIDEAN: + distfn = &dissimilarity::sqeuclidean; + break; + case METRIC_CITYBLOCK: + set_cityblock(); + break; + } + } + + if (temp_point_array) + { + Xnew.init((N-1)*dim); + } + } + + ~dissimilarity() + { + free(V); + } + + inline double operator () (const int_fast32_t i, const int_fast32_t j) const + { + return (this->*distfn)(i,j); + } + + inline double X (const int_fast32_t i, const int_fast32_t j) const + { + return Xa[i*dim+j]; + } + + inline bool Xb (const int_fast32_t i, const int_fast32_t j) const + { + return reinterpret_cast(Xa)[i*dim+j]; + } + + inline double * Xptr(const int_fast32_t i, const int_fast32_t j) const + { + return Xa+i*dim+j; + } + + void postprocess(cluster_result & Z2) const + { + if (postprocessfn!=NULL) + { + (Z2.*postprocessfn)(postprocessarg); + } + } + + double sqeuclidean(const int_fast32_t i, const int_fast32_t j) const + { + double sum = 0; + double const * Pi = Xa+i*dim; + double const * Pj = Xa+j*dim; + for (int_fast32_t k=0; k=1); + CV_Assert(N <= MAX_INDEX/4); + + try + { + + cluster_result Z2(N-1); + auto_array_ptr members; + // The distance update formula needs the number of data points in a cluster. + members.init(N, 1); + NN_chain_core(N, D, members, Z2); + generate_dendrogram(Z, Z2, N); + + } // try + catch (const std::bad_alloc&) + { + CV_Error(CV_StsNoMem, "Not enough Memory for erGrouping hierarchical clustering structures!"); + } + catch(const std::exception&) + { + CV_Error(CV_StsError, "Uncaught exception in erGrouping!"); + } + catch(...) + { + CV_Error(CV_StsError, "C++ exception (unknown reason) in erGrouping!"); + } + return 0; + +} + +/*Clustering for the "stored data approach": the input are points in a vector space.*/ +static int linkage_vector(double *X, int N, int dim, double * Z, unsigned char method, unsigned char metric) +{ + + CV_Assert(N >=1); + CV_Assert(N <= MAX_INDEX/4); + CV_Assert(dim >=1); + + try + { + cluster_result Z2(N-1); + auto_array_ptr members; + dissimilarity dist(X, N, dim, members, method, metric, false); + MST_linkage_core_vector(N, dist, Z2); + dist.postprocess(Z2); + generate_dendrogram(Z, Z2, N); + } // try + catch (const std::bad_alloc&) + { + CV_Error(CV_StsNoMem, "Not enough Memory for erGrouping hierarchical clustering structures!"); + } + catch(const std::exception&) + { + CV_Error(CV_StsError, "Uncaught exception in erGrouping!"); + } + catch(...) + { + CV_Error(CV_StsError, "C++ exception (unknown reason) in erGrouping!"); + } + return 0; +} + + +/* Maximal Meaningful Clusters Detection */ + +struct HCluster{ + int num_elem; // number of elements + vector elements; // elements (contour ID) + int nfa; // the number of false alarms for this merge + float dist; // distance of the merge + float dist_ext; // distamce where this merge will merge with another + long double volume; // volume of the bounding sphere (or bounding box) + long double volume_ext; // volume of the sphere(or box) + envolvent empty space + vector > points; // nD points in this cluster + bool max_meaningful; // is this merge max meaningul ? + vector max_in_branch; // otherwise which merges are the max_meaningful in this branch + int min_nfa_in_branch; // min nfa detected within the chilhood + int node1; + int node2; +}; + +class MaxMeaningfulClustering +{ +public: + unsigned char method_; + unsigned char metric_; + + /// Constructor. + MaxMeaningfulClustering(unsigned char method, unsigned char metric){ method_=method; metric_=metric; }; + + void operator()(double *data, unsigned int num, int dim, unsigned char method, + unsigned char metric, vector< vector > *meaningful_clusters); + void operator()(double *data, unsigned int num, unsigned char method, + vector< vector > *meaningful_clusters); + +private: + /// Helper functions + void build_merge_info(double *dendogram, double *data, int num, int dim, bool use_full_merge_rule, + vector *merge_info, vector< vector > *meaningful_clusters); + void build_merge_info(double *dendogram, int num, vector *merge_info, + vector< vector > *meaningful_clusters); + + /// Number of False Alarms + int nfa(float sigma, int k, int N); + +}; + +void MaxMeaningfulClustering::operator()(double *data, unsigned int num, int dim, unsigned char method, + unsigned char metric, vector< vector > *meaningful_clusters) +{ + + double *Z = (double*)malloc(((num-1)*4) * sizeof(double)); // we need 4 floats foreach sample merge. + if (Z == NULL) + CV_Error(CV_StsNoMem, "Not enough Memory for erGrouping hierarchical clustering structures!"); + + linkage_vector(data, (int)num, dim, Z, method, metric); + + vector merge_info; + build_merge_info(Z, data, (int)num, dim, false, &merge_info, meaningful_clusters); + + free(Z); + merge_info.clear(); +} + +void MaxMeaningfulClustering::operator()(double *data, unsigned int num, unsigned char method, + vector< vector > *meaningful_clusters) +{ + + CV_Assert(method == METHOD_METR_AVERAGE); + + double *Z = (double*)malloc(((num-1)*4) * sizeof(double)); // we need 4 floats foreach sample merge. + if (Z == NULL) + CV_Error(CV_StsNoMem, "Not enough Memory for erGrouping hierarchical clustering structures!"); + + linkage(data, (int)num, Z); + + vector merge_info; + build_merge_info(Z, (int)num, &merge_info, meaningful_clusters); + + free(Z); + merge_info.clear(); +} + +void MaxMeaningfulClustering::build_merge_info(double *Z, double *X, int N, int dim, + bool use_full_merge_rule, + vector *merge_info, + vector< vector > *meaningful_clusters) +{ + + // walk the whole dendogram + for (int i=0; i<(N-1)*4; i=i+4) + { + HCluster cluster; + cluster.num_elem = (int)Z[i+3]; //number of elements + + int node1 = (int)Z[i]; + int node2 = (int)Z[i+1]; + float dist = (float)Z[i+2]; + + if (node1 point; + for (int n=0; nat(node1-N).points.size(); ii++) + { + cluster.points.push_back(merge_info->at(node1-N).points[ii]); + cluster.elements.push_back(merge_info->at(node1-N).elements[ii]); + } + //update the extended volume of node1 using the dist where this cluster merge with another + merge_info->at(node1-N).dist_ext = dist; + } + if (node2 point; + for (int n=0; nat(node2-N).points.size(); ii++) + { + cluster.points.push_back(merge_info->at(node2-N).points[ii]); + cluster.elements.push_back(merge_info->at(node2-N).elements[ii]); + } + + //update the extended volume of node2 using the dist where this cluster merge with another + merge_info->at(node2-N).dist_ext = dist; + } + + Minibox mb; + for (int ii=0; ii<(int)cluster.points.size(); ii++) + { + mb.check_in(&cluster.points.at(ii)); + } + + cluster.dist = dist; + cluster.volume = mb.volume(); + if (cluster.volume >= 1) + cluster.volume = 0.999999; + if (cluster.volume == 0) + cluster.volume = 0.001; + + cluster.volume_ext=1; + + if (node1>=N) + { + merge_info->at(node1-N).volume_ext = cluster.volume; + } + if (node2>=N) + { + merge_info->at(node2-N).volume_ext = cluster.volume; + } + + cluster.node1 = node1; + cluster.node2 = node2; + + merge_info->push_back(cluster); + + } + + for (int i=0; i<(int)merge_info->size(); i++) + { + + merge_info->at(i).nfa = nfa((float)merge_info->at(i).volume, + merge_info->at(i).num_elem, N); + int node1 = merge_info->at(i).node1; + int node2 = merge_info->at(i).node2; + + if ((node1at(i).max_meaningful = true; + merge_info->at(i).max_in_branch.push_back(i); + merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; + } else { + if ((node1>=N)&&(node2>=N)) + { + // both nodes are "sets" : we must evaluate the merging condition + if ( ( (use_full_merge_rule) && + ((merge_info->at(i).nfa < merge_info->at(node1-N).nfa + merge_info->at(node2-N).nfa) && + (merge_info->at(i).nfa < min(merge_info->at(node1-N).min_nfa_in_branch, + merge_info->at(node2-N).min_nfa_in_branch))) ) || + ( (!use_full_merge_rule) && + ((merge_info->at(i).nfa < min(merge_info->at(node1-N).min_nfa_in_branch, + merge_info->at(node2-N).min_nfa_in_branch))) ) ) + { + merge_info->at(i).max_meaningful = true; + merge_info->at(i).max_in_branch.push_back(i); + merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; + for (int k =0; k<(int)merge_info->at(node1-N).max_in_branch.size(); k++) + merge_info->at(merge_info->at(node1-N).max_in_branch.at(k)).max_meaningful = false; + for (int k =0; k<(int)merge_info->at(node2-N).max_in_branch.size(); k++) + merge_info->at(merge_info->at(node2-N).max_in_branch.at(k)).max_meaningful = false; + } else { + merge_info->at(i).max_meaningful = false; + merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(), + merge_info->at(node1-N).max_in_branch.begin(), + merge_info->at(node1-N).max_in_branch.end()); + merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(), + merge_info->at(node2-N).max_in_branch.begin(), + merge_info->at(node2-N).max_in_branch.end()); + + if (merge_info->at(i).nfa < min(merge_info->at(node1-N).min_nfa_in_branch, + merge_info->at(node2-N).min_nfa_in_branch)) + + merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; + else + merge_info->at(i).min_nfa_in_branch = min(merge_info->at(node1-N).min_nfa_in_branch, + merge_info->at(node2-N).min_nfa_in_branch); + } + } else { + + //one of the nodes is a "set" and the other is an individual sample : check merging condition + if (node1>=N) + { + if ((merge_info->at(i).nfa < merge_info->at(node1-N).nfa + 1) && + (merge_info->at(i).nfaat(node1-N).min_nfa_in_branch)) + { + merge_info->at(i).max_meaningful = true; + merge_info->at(i).max_in_branch.push_back(i); + merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; + for (int k =0; k<(int)merge_info->at(node1-N).max_in_branch.size(); k++) + merge_info->at(merge_info->at(node1-N).max_in_branch.at(k)).max_meaningful = false; + } else { + merge_info->at(i).max_meaningful = false; + merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(), + merge_info->at(node1-N).max_in_branch.begin(), + merge_info->at(node1-N).max_in_branch.end()); + merge_info->at(i).min_nfa_in_branch = min(merge_info->at(i).nfa, + merge_info->at(node1-N).min_nfa_in_branch); + } + } else { + if ((merge_info->at(i).nfa < merge_info->at(node2-N).nfa + 1) && + (merge_info->at(i).nfaat(node2-N).min_nfa_in_branch)) + { + merge_info->at(i).max_meaningful = true; + merge_info->at(i).max_in_branch.push_back(i); + merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; + for (int k =0; k<(int)merge_info->at(node2-N).max_in_branch.size(); k++) + merge_info->at(merge_info->at(node2-N).max_in_branch.at(k)).max_meaningful = false; + } else { + merge_info->at(i).max_meaningful = false; + merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(), + merge_info->at(node2-N).max_in_branch.begin(), + merge_info->at(node2-N).max_in_branch.end()); + merge_info->at(i).min_nfa_in_branch = min(merge_info->at(i).nfa, + merge_info->at(node2-N).min_nfa_in_branch); + } + } + } + } + } + + for (int i=0; i<(int)merge_info->size(); i++) + { + if (merge_info->at(i).max_meaningful) + { + vector cluster; + for (int k=0; k<(int)merge_info->at(i).elements.size();k++) + cluster.push_back(merge_info->at(i).elements.at(k)); + meaningful_clusters->push_back(cluster); + } + } + +} + +void MaxMeaningfulClustering::build_merge_info(double *Z, int N, vector *merge_info, + vector< vector > *meaningful_clusters) +{ + + // walk the whole dendogram + for (int i=0; i<(N-1)*4; i=i+4) + { + HCluster cluster; + cluster.num_elem = (int)Z[i+3]; //number of elements + + int node1 = (int)Z[i]; + int node2 = (int)Z[i+1]; + float dist = (float)Z[i+2]; + if (dist != dist) //this is to avoid NaN values + dist=0; + + if (node1at(node1-N).elements.size(); ii++) + { + cluster.elements.push_back(merge_info->at(node1-N).elements[ii]); + } + } + if (node2at(node2-N).elements.size(); ii++) + { + cluster.elements.push_back(merge_info->at(node2-N).elements[ii]); + } + } + + cluster.dist = dist; + if (cluster.dist >= 1) + cluster.dist = 0.999999; + if (cluster.dist == 0) + cluster.dist = 1.e-25; + + cluster.dist_ext = 1; + + if (node1>=N) + { + merge_info->at(node1-N).dist_ext = cluster.dist; + } + if (node2>=N) + { + merge_info->at(node2-N).dist_ext = cluster.dist; + } + + cluster.node1 = node1; + cluster.node2 = node2; + + merge_info->push_back(cluster); + } + + for (int i=0; i<(int)merge_info->size(); i++) + { + + merge_info->at(i).nfa = nfa(merge_info->at(i).dist, + merge_info->at(i).num_elem, N); + int node1 = merge_info->at(i).node1; + int node2 = merge_info->at(i).node2; + + if ((node1at(i).max_meaningful = true; + merge_info->at(i).max_in_branch.push_back(i); + merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; + } else { + if ((node1>=N)&&(node2>=N)) + { + // both nodes are "sets" so we must evaluate the merging condition + if ((merge_info->at(i).nfa < merge_info->at(node1-N).nfa + merge_info->at(node2-N).nfa) && + (merge_info->at(i).nfa < min(merge_info->at(node1-N).min_nfa_in_branch, + merge_info->at(node2-N).min_nfa_in_branch))) + { + merge_info->at(i).max_meaningful = true; + merge_info->at(i).max_in_branch.push_back(i); + merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; + for (int k =0; k<(int)merge_info->at(node1-N).max_in_branch.size(); k++) + merge_info->at(merge_info->at(node1-N).max_in_branch.at(k)).max_meaningful = false; + for (int k =0; k<(int)merge_info->at(node2-N).max_in_branch.size(); k++) + merge_info->at(merge_info->at(node2-N).max_in_branch.at(k)).max_meaningful = false; + } else { + merge_info->at(i).max_meaningful = false; + merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(), + merge_info->at(node1-N).max_in_branch.begin(), + merge_info->at(node1-N).max_in_branch.end()); + merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(), + merge_info->at(node2-N).max_in_branch.begin(), + merge_info->at(node2-N).max_in_branch.end()); + if (merge_info->at(i).nfa < min(merge_info->at(node1-N).min_nfa_in_branch, + merge_info->at(node2-N).min_nfa_in_branch)) + merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; + else + merge_info->at(i).min_nfa_in_branch = min(merge_info->at(node1-N).min_nfa_in_branch, + merge_info->at(node2-N).min_nfa_in_branch); + } + + } else { + + // one node is a "set" and the other is an indivisual sample: check merging condition + if (node1>=N) + { + if ((merge_info->at(i).nfa < merge_info->at(node1-N).nfa + 1) && + (merge_info->at(i).nfaat(node1-N).min_nfa_in_branch)) + { + merge_info->at(i).max_meaningful = true; + merge_info->at(i).max_in_branch.push_back(i); + merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; + + for (int k =0; k<(int)merge_info->at(node1-N).max_in_branch.size(); k++) + merge_info->at(merge_info->at(node1-N).max_in_branch.at(k)).max_meaningful = false; + + } else { + merge_info->at(i).max_meaningful = false; + merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(), + merge_info->at(node1-N).max_in_branch.begin(), + merge_info->at(node1-N).max_in_branch.end()); + merge_info->at(i).min_nfa_in_branch = min(merge_info->at(i).nfa, + merge_info->at(node1-N).min_nfa_in_branch); + } + } else { + if ((merge_info->at(i).nfa < merge_info->at(node2-N).nfa + 1) && + (merge_info->at(i).nfaat(node2-N).min_nfa_in_branch)) + { + merge_info->at(i).max_meaningful = true; + merge_info->at(i).max_in_branch.push_back(i); + merge_info->at(i).min_nfa_in_branch = merge_info->at(i).nfa; + for (int k =0; k<(int)merge_info->at(node2-N).max_in_branch.size(); k++) + merge_info->at(merge_info->at(node2-N).max_in_branch.at(k)).max_meaningful = false; + } else { + merge_info->at(i).max_meaningful = false; + merge_info->at(i).max_in_branch.insert(merge_info->at(i).max_in_branch.end(), + merge_info->at(node2-N).max_in_branch.begin(), + merge_info->at(node2-N).max_in_branch.end()); + merge_info->at(i).min_nfa_in_branch = min(merge_info->at(i).nfa, + merge_info->at(node2-N).min_nfa_in_branch); + } + } + } + } + } + + for (int i=0; i<(int)merge_info->size(); i++) + { + if (merge_info->at(i).max_meaningful) + { + vector cluster; + for (int k=0; k<(int)merge_info->at(i).elements.size();k++) + cluster.push_back(merge_info->at(i).elements.at(k)); + meaningful_clusters->push_back(cluster); + } + } + +} + +int MaxMeaningfulClustering::nfa(float sigma, int k, int N) +{ + // use an approximation for the nfa calculations (faster) + return -1*(int)NFA( N, k, (double) sigma, 0); +} + +void accumulate_evidence(vector > *meaningful_clusters, int grow, Mat *co_occurrence); + +void accumulate_evidence(vector > *meaningful_clusters, int grow, Mat *co_occurrence) +{ + for (int k=0; k<(int)meaningful_clusters->size(); k++) + for (int i=0; i<(int)meaningful_clusters->at(k).size(); i++) + for (int j=i; j<(int)meaningful_clusters->at(k).size(); j++) + if (meaningful_clusters->at(k).at(i) != meaningful_clusters->at(k).at(j)) + { + co_occurrence->at(meaningful_clusters->at(k).at(i), meaningful_clusters->at(k).at(j)) += grow; + co_occurrence->at(meaningful_clusters->at(k).at(j), meaningful_clusters->at(k).at(i)) += grow; + } +} + +// ERFeatures structure stores additional features for a given ERStat instance +struct ERFeatures +{ + int area; + Point center; + Rect rect; + float intensity_mean; ///< mean intensity of the whole region + float intensity_std; ///< intensity standard deviation of the whole region + float boundary_intensity_mean; ///< mean intensity of the boundary of the region + float boundary_intensity_std; ///< intensity standard deviation of the boundary of the region + double stroke_mean; ///< mean stroke width approximation of the whole region + double stroke_std; ///< stroke standard deviation of the whole region + double gradient_mean; ///< mean gradient magnitude of the whole region + double gradient_std; ///< gradient magnitude standard deviation of the whole region +}; + +float extract_features(InputOutputArray src, vector ®ions, vector &features); +void ergrouping(InputOutputArray src, vector ®ions); + +float extract_features(InputOutputArray src, vector ®ions, vector &features) +{ + // assert correct image type + CV_Assert( (src.type() == CV_8UC1) || (src.type() == CV_8UC3) ); + + CV_Assert( !regions.empty() ); + CV_Assert( features.empty() ); + + Mat grey = src.getMat(); + + Mat gradient_magnitude = Mat_(grey.size()); + get_gradient_magnitude( grey, gradient_magnitude); + + Mat region_mask = Mat::zeros(grey.rows+2, grey.cols+2, CV_8UC1); + + float max_stroke = 0; + + for (int r=0; r<(int)regions.size(); r++) + { + ERFeatures f; + ERStat *stat = ®ions.at(r); + + f.area = stat->area; + f.rect = stat->rect; + f.center = Point(f.rect.x+(f.rect.width/2),f.rect.y+(f.rect.height/2)); + + if (regions.at(r).parent != NULL) + { + + //Fill the region and calculate features + Mat region = region_mask(Rect(Point(stat->rect.x,stat->rect.y), + Point(stat->rect.br().x+2,stat->rect.br().y+2))); + region = Scalar(0); + int newMaskVal = 255; + int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY; + Rect rect; + + floodFill( grey(Rect(Point(stat->rect.x,stat->rect.y),Point(stat->rect.br().x,stat->rect.br().y))), + region, Point(stat->pixel%grey.cols - stat->rect.x, stat->pixel/grey.cols - stat->rect.y), + Scalar(255), &rect, Scalar(stat->level), Scalar(0), flags ); + rect.width += 2; + rect.height += 2; + Mat rect_mask = region_mask(Rect(stat->rect.x+1,stat->rect.y+1,stat->rect.width,stat->rect.height)); + + + Scalar mean,std; + meanStdDev( grey(stat->rect), mean, std, rect_mask); + f.intensity_mean = (float)mean[0]; + f.intensity_std = (float)std[0]; + + Mat tmp; + distanceTransform(rect_mask, tmp, DIST_L1,3); + + meanStdDev(tmp,mean,std,rect_mask); + f.stroke_mean = mean[0]; + f.stroke_std = std[0]; + + if (f.stroke_mean > max_stroke) + max_stroke = (float)f.stroke_mean; + + Mat element = getStructuringElement( MORPH_RECT, Size(5, 5), Point(2, 2) ); + dilate(rect_mask, tmp, element); + absdiff(tmp, rect_mask, tmp); + + meanStdDev( grey(stat->rect), mean, std, tmp); + f.boundary_intensity_mean = (float)mean[0]; + f.boundary_intensity_std = (float)std[0]; + + Mat tmp2; + dilate(rect_mask, tmp, element); + erode (rect_mask, tmp2, element); + absdiff(tmp, tmp2, tmp); + + meanStdDev( gradient_magnitude(stat->rect), mean, std, tmp); + f.gradient_mean = mean[0]; + f.gradient_std = std[0]; + + rect_mask = Scalar(0); + + } else { + + f.intensity_mean = 0; + f.intensity_std = 0; + + f.stroke_mean = 0; + f.stroke_std = 0; + + f.boundary_intensity_mean = 0; + f.boundary_intensity_std = 0; + + f.gradient_mean = 0; + f.gradient_std = 0; + } + + features.push_back(f); + } + + return max_stroke; +} + +/*! + Find groups of Extremal Regions that are organized as text blocks. This function implements + the grouping algorithm described in: + Gomez L. and Karatzas D.: Multi-script Text Extraction from Natural Scenes, ICDAR 2013. + Notice that this implementation constrains the results to horizontally-aligned text and + latin script (since ERFilter default classifiers are trained only for latin script detection). + + The algorithm combines two different clustering techniques in a single parameter-free procedure + to detect groups of regions organized as text. The maximally meaningful groups are fist detected + in several feature spaces, where each feature space is a combination of proximity information + (x,y coordinates) and a similarity measure (intensity, color, size, gradient magnitude, etc.), + thus providing a set of hypotheses of text groups. Evidence Accumulation framework is used to + combine all these hypotheses to get the final estimate. Each of the resulting groups are finally + heuristically validated in order to assest if they form a valid horizontally-aligned text block. + + \param src Vector of sinle channel images CV_8UC1 from wich the regions were extracted. + \param regions Vector of ER's retreived from the ERFilter algorithm from each channel + \param groups The output of the algorithm are stored in this parameter as list of rectangles. +*/ +void erGrouping(InputArrayOfArrays _src, vector > ®ions, std::vector &text_boxes) +{ + // TODO assert correct vector + + std::vector src; + _src.getMatVector(src); + + CV_Assert ( !src.empty() ); + CV_Assert ( src.size() == regions.size() ); + + if (!text_boxes.empty()) + { + text_boxes.clear(); + } + + for (int c=0; c<(int)src.size(); c++) + { + Mat img = src.at(c); + + // assert correct image type + CV_Assert( img.type() == CV_8UC1 ); + + CV_Assert( !regions.at(c).empty() ); + + if ( regions.at(c).size() < 3 ) + continue; + + + std::vector > meaningful_clusters; + vector features; + float max_stroke = extract_features(img,regions.at(c),features); + + MaxMeaningfulClustering mm_clustering(METHOD_METR_SINGLE, METRIC_SEUCLIDEAN); + + Mat co_occurrence_matrix = Mat::zeros((int)regions.at(c).size(), (int)regions.at(c).size(), CV_64F); + + int num_features = MAX_NUM_FEATURES; + + // Find the Max. Meaningful Clusters in each feature space independently + int dims[MAX_NUM_FEATURES] = {3,3,3,3,3,3,3,3,3}; + + for (int f=0; f(i, j); + pos++; + } + } + + // Find the Max. Meaningful Clusters in the co-occurrence matrix + mm_clustering(D, regions.at(c).size(), METHOD_METR_AVERAGE, &meaningful_clusters); + free(D); + + + /* --------------------------------- Groups Validation --------------------------------*/ + /* Examine each of the clusters in order to assest if they are valid text lines or not */ + /* ------------------------------------------------------------------------------------*/ + + // remove non-horizontally-aligned groups + vector groups_rects; + for (int i=(int)meaningful_clusters.size()-1; i>=0; i--) + { + + Rect group_rect; + float sumx=0, sumy=0, sumxy=0, sumx2=0; + + for (int j=0; j<(int)meaningful_clusters.at(i).size();j++) + { + if (j==0) + { + group_rect = regions.at(c).at(meaningful_clusters.at(i).at(j)).rect; + } else { + group_rect = group_rect | regions.at(c).at(meaningful_clusters.at(i).at(j)).rect; + } + + sumx += regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.x + + regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.width/2; + sumy += regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.y + + regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.height/2; + sumxy += (regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.x + + regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.width/2)* + (regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.y + + regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.height/2); + sumx2 += (regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.x + + regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.width/2)* + (regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.x + + regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.width/2); + } + // line coefficients + //float a0=(sumy*sumx2-sumx*sumxy)/((int)meaningful_clusters.at(i).size()*sumx2-sumx*sumx); + float a1=((int)meaningful_clusters.at(i).size()*sumxy-sumx*sumy) / + ((int)meaningful_clusters.at(i).size()*sumx2-sumx*sumx); + + if (abs(a1) > 0.13) + meaningful_clusters.erase(meaningful_clusters.begin()+i); + else + groups_rects.insert(groups_rects.begin(), group_rect); + + } + + //TODO if group has less than 5 chars we can infer that is a single line so an additional rule can + // be added here in order to reject non-colinear small groups + + + /* TODO this is better code for detecting non-horizontally-aligned groups but only works for + * single lines so we need here a way to split the rejected groups into several line hypothesis + * and recursively apply the text line validation test until no more splits can be done + + vector groups_rects; + + for (int i=meaningful_clusters.size()-1; i>=0; i--) + { + + Rect group_rect; + + for (int j=0; j<(int)meaningful_clusters.at(i).size();j++) + { + if (j==0) + { + group_rect = regions.at(c).at(meaningful_clusters.at(i).at(j)).rect; + } else { + group_rect = group_rect | regions.at(c).at(meaningful_clusters.at(i).at(j)).rect; + } + } + + float group_y_center = 0; + for (int j=0; j<(int)meaningful_clusters.at(i).size();j++) + { + int region_y_center = regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.y + + regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.height/2; + group_y_center += region_y_center; + } + group_y_center = group_y_center / (int)meaningful_clusters.at(i).size(); + + float err = 0; + for (int j=0; j<(int)meaningful_clusters.at(i).size();j++) + { + int region_y_center = regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.y + + regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.height/2; + err += pow(group_y_center-region_y_center, 2); + } + err = sqrt(err / (int)meaningful_clusters.at(i).size()); + err = err / group_rect.height; + + if (err > 0.17) + meaningful_clusters.erase(meaningful_clusters.begin()+i); + else + groups_rects.insert(groups_rects.begin(), group_rect); + + } */ + + // check for colinear groups that can be merged + for (int i=0; i<(int)meaningful_clusters.size(); i++) + { + int ay1 = groups_rects.at(i).y; + int ay2 = groups_rects.at(i).y + groups_rects.at(i).height; + int ax1 = groups_rects.at(i).x; + int ax2 = groups_rects.at(i).x + groups_rects.at(i).width; + for (int j=(int)meaningful_clusters.size()-1; j>i; j--) + { + int by1 = groups_rects.at(j).y; + int by2 = groups_rects.at(j).y + groups_rects.at(j).height; + int bx1 = groups_rects.at(j).x; + int bx2 = groups_rects.at(j).x + groups_rects.at(j).width; + + int y_intersection = min(ay2,by2) - max(ay1,by1); + + if (y_intersection > 0.75*(max(groups_rects.at(i).height,groups_rects.at(j).height))) + { + int xdist = min(abs(ax2-bx1),abs(bx2-ax1)); + if (xdist < 0.75*(max(groups_rects.at(i).height,groups_rects.at(j).height))) + { + for (int r=0; r<(int)meaningful_clusters.at(j).size(); r++) + meaningful_clusters.at(i).push_back(meaningful_clusters.at(j).at(r)); + meaningful_clusters.erase(meaningful_clusters.begin()+j); + groups_rects.erase(groups_rects.begin()+j); + } + } + + } + } + + // remove groups with less than 3 non-overlapping regions + for (int i=(int)meaningful_clusters.size()-1; i>=0; i--) + { + + double group_probability_mean = 0; + Rect group_rect; + vector individual_components; + + for (int j=0; j<(int)meaningful_clusters.at(i).size();j++) + { + group_probability_mean += regions.at(c).at(meaningful_clusters.at(i).at(j)).probability; + + if (j==0) + { + group_rect = features.at(meaningful_clusters.at(i).at(j)).rect; + individual_components.push_back(group_rect); + } else { + bool matched = false; + for (int k=0; k<(int)individual_components.size(); k++) + { + Rect intersection = individual_components.at(k) & + features.at(meaningful_clusters.at(i).at(j)).rect; + + if ((intersection == features.at(meaningful_clusters.at(i).at(j)).rect) || + (intersection == individual_components.at(k))) + { + individual_components.at(k) = individual_components.at(k) | + features.at(meaningful_clusters.at(i).at(j)).rect; + matched = true; + } + } + + if (!matched) + individual_components.push_back(features.at(meaningful_clusters.at(i).at(j)).rect); + + group_rect = group_rect | features.at(meaningful_clusters.at(i).at(j)).rect; + } + } + group_probability_mean = group_probability_mean / meaningful_clusters.at(i).size(); + + + if (individual_components.size()<3) // || (group_probability_mean < 0.5) + { + meaningful_clusters.erase(meaningful_clusters.begin()+i); + groups_rects.erase(groups_rects.begin()+i); + continue; + } + + } + + // TODO remove groups with high height variability + + // Try to remove groups with repetitive patterns + for (int i=(int)meaningful_clusters.size()-1; i>=0; i--) + { + Mat grey = img; + Mat sad = Mat::zeros(regions.at(c).at(meaningful_clusters.at(i).at(0)).rect.size() , CV_8UC1); + Mat region_mask = Mat::zeros(grey.rows+2, grey.cols+2, CV_8UC1); + float sad_value = 0; + Mat ratios = Mat::zeros(1, (int)meaningful_clusters.at(i).size(), CV_32FC1); + Mat holes = Mat::zeros(1, (int)meaningful_clusters.at(i).size(), CV_32FC1); + + for (int r=0; r<(int)meaningful_clusters.at(i).size(); r++) + { + ERStat *stat = ®ions.at(c).at(meaningful_clusters.at(i).at(r)); + + //Fill the region + Mat region = region_mask(Rect(Point(stat->rect.x,stat->rect.y), + Point(stat->rect.br().x+2,stat->rect.br().y+2))); + region = Scalar(0); + int newMaskVal = 255; + int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY; + Rect rect; + + floodFill( grey(Rect(Point(stat->rect.x,stat->rect.y),Point(stat->rect.br().x,stat->rect.br().y))), + region, Point(stat->pixel%grey.cols - stat->rect.x, stat->pixel/grey.cols - stat->rect.y), + Scalar(255), &rect, Scalar(stat->level), Scalar(0), flags ); + + Mat mask = Mat::zeros(regions.at(c).at(meaningful_clusters.at(i).at(0)).rect.size() , CV_8UC1); + resize(region, mask, mask.size()); + mask = mask - 254; + if (r!=0) + { + // using Sum of Absolute Differences + absdiff(sad, mask, sad); + Scalar s = sum(sad); + sad_value += (float)s[0]/(sad.rows*sad.cols); + } + mask.copyTo(sad); + ratios.at(0,r) = (float)min(stat->rect.width, stat->rect.height) / + max(stat->rect.width, stat->rect.height); + holes.at(0,r) = (float)stat->hole_area_ratio; + + } + Scalar mean,std; + meanStdDev( holes, mean, std); + float holes_mean = (float)mean[0]; + meanStdDev( ratios, mean, std); + + // Set empirically + if (((float)sad_value / ((int)meaningful_clusters.at(i).size()-1) < 0.12) || + (((float)sad_value / ((int)meaningful_clusters.at(i).size()-1) < 0.175)&&(holes_mean < 0.015))|| + //TODO this must be num of non-overlapping regions.at(c) and probably 7 is ok! + ((holes_mean < 0.005)&&((int)meaningful_clusters.at(i).size()>10))) + { + meaningful_clusters.erase(meaningful_clusters.begin()+i); + groups_rects.erase(groups_rects.begin()+i); + } + } + + // remove small groups inside others + vector groups_to_remove; + for (int i=0; i<(int)meaningful_clusters.size()-1; i++) + { + for (int j=i+1; j<(int)meaningful_clusters.size(); j++) + { + + Rect intersection = groups_rects.at(i) & groups_rects.at(j); + + if (intersection == groups_rects.at(i)) + groups_to_remove.push_back(i); + if (intersection == groups_rects.at(j)) + groups_to_remove.push_back(j); + + } + } + + if (!groups_to_remove.empty()) + { + int last_removed = -1; + std::sort(groups_to_remove.begin(), groups_to_remove.end()); + for (int i=(int)groups_to_remove.size()-1; i>=0; i--) + { + if (groups_to_remove.at(i) == last_removed) + continue; + else + last_removed = groups_to_remove.at(i); + + meaningful_clusters.erase(meaningful_clusters.begin()+groups_to_remove.at(i)); + groups_rects.erase(groups_rects.begin()+groups_to_remove.at(i)); + } + } + groups_to_remove.clear(); + + for (int i=0; i<(int)groups_rects.size(); i++) + text_boxes.push_back(groups_rects.at(i)); + } + + // check for colinear groups that can be merged + for (int i=0; i<(int)text_boxes.size(); i++) + { + int ay1 = text_boxes.at(i).y; + int ay2 = text_boxes.at(i).y + text_boxes.at(i).height; + int ax1 = text_boxes.at(i).x; + int ax2 = text_boxes.at(i).x + text_boxes.at(i).width; + for (int j=(int)text_boxes.size()-1; j>i; j--) + { + int by1 = text_boxes.at(j).y; + int by2 = text_boxes.at(j).y + text_boxes.at(j).height; + int bx1 = text_boxes.at(j).x; + int bx2 = text_boxes.at(j).x + text_boxes.at(j).width; + + int y_intersection = min(ay2,by2) - max(ay1,by1); + + if (y_intersection > 0.6*(max(text_boxes.at(i).height,text_boxes.at(j).height))) + { + int xdist = min(abs(ax2-bx1),abs(bx2-ax1)); + Rect intersection = text_boxes.at(i) & text_boxes.at(j); + if ( (xdist < 0.75*(max(text_boxes.at(i).height,text_boxes.at(j).height))) || + (intersection.width != 0)) + { + text_boxes.at(i) = text_boxes.at(i) | text_boxes.at(j); + text_boxes.erase(text_boxes.begin()+j); + } + } + + } + } + +} } diff --git a/samples/cpp/erfilter.cpp b/samples/cpp/erfilter.cpp index 69009b8..389cb48 100644 --- a/samples/cpp/erfilter.cpp +++ b/samples/cpp/erfilter.cpp @@ -16,105 +16,90 @@ using namespace std; using namespace cv; -void er_draw(Mat &src, Mat &dst, ERStat& er); +void show_help_and_exit(const char *cmd); +void groups_draw(Mat &src, vector &groups); +void er_draw(Mat &src, Mat &dst, ERStat& er); -void er_draw(Mat &src, Mat &dst, ERStat& er) +int main(int argc, const char * argv[]) { - if (er.parent != NULL) // deprecate the root region - { - int newMaskVal = 255; - int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY; - floodFill(src,dst,Point(er.pixel%src.cols,er.pixel/src.cols),Scalar(255),0,Scalar(er.level),Scalar(0),flags); - } + if (argc < 2) show_help_and_exit(argv[0]); -} - -int main(int argc, const char * argv[]) -{ + Mat src = imread(argv[1]); + // Extract channels to be processed individually + vector channels; + computeNMChannels(src, channels); - vector regions; + int cn = (int)channels.size(); + // Append negative channels to detect ER- (bright regions over dark background) + for (int c = 0; c < cn-1; c++) + channels.push_back(255-channels[c]); - if (argc < 2) { - cout << "Demo program of the Extremal Region Filter algorithm described in " << endl; - cout << "Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012" << endl << endl; - cout << " Usage: " << argv[0] << " input_image " << endl; - cout << " Default classifier files (trained_classifierNM*.xml) should be in ./" << endl; - return -1; - } + // Create ERFilter objects with the 1st and 2nd stage default classifiers + Ptr er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml"),8,0.00025,0.13,0.4,true,0.1); + Ptr er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"),0.3); - Mat original = imread(argv[1]); - Mat gt; - if (argc > 2) + vector > regions(channels.size()); + // Apply the default cascade classifier to each independent channel (could be done in parallel) + for (int c=0; c<(int)channels.size(); c++) { - gt = imread(argv[2]); - cvtColor(gt, gt, COLOR_RGB2GRAY); - threshold(gt, gt, 254, 255, THRESH_BINARY); + er_filter1->run(channels[c], regions[c]); + er_filter2->run(channels[c], regions[c]); } - Mat grey(original.size(),CV_8UC1); - cvtColor(original,grey,COLOR_RGB2GRAY); - - double t = (double)getTickCount(); - // Build ER tree and filter with the 1st stage default classifier - Ptr er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml")); + // Detect character groups + vector groups; + erGrouping(channels, regions, groups); - er_filter1->run(grey, regions); - - t = (double)getTickCount() - t; - cout << " --------------------------------------------------------------------------------------------------" << endl; - cout << "\t FIRST STAGE CLASSIFIER done in " << t * 1000. / getTickFrequency() << " ms." << endl; - cout << " --------------------------------------------------------------------------------------------------" << endl; - cout << setw(9) << regions.size()+er_filter1->getNumRejected() << "\t Extremal Regions extracted " << endl; - cout << setw(9) << regions.size() << "\t Extremal Regions selected by the first stage of the sequential classifier." << endl; - cout << "\t \t (saving into out_second_stage.jpg)" << endl; - cout << " --------------------------------------------------------------------------------------------------" << endl; + // draw groups + groups_draw(src, groups); + imshow("grouping",src); + waitKey(-1); + // memory clean-up er_filter1.release(); - - // draw regions - Mat mask = Mat::zeros(grey.rows+2,grey.cols+2,CV_8UC1); - for (int r=0; r<(int)regions.size(); r++) - er_draw(grey, mask, regions.at(r)); - mask = 255-mask; - imwrite("out_first_stage.jpg", mask); - - if (argc > 2) + er_filter2.release(); + regions.clear(); + if (!groups.empty()) { - Mat tmp_mask = (255-gt) & (255-mask(Rect(Point(1,1),Size(mask.cols-2,mask.rows-2)))); - cout << "Recall for the 1st stage filter = " << (float)countNonZero(tmp_mask) / countNonZero(255-gt) << endl; + groups.clear(); } +} - t = (double)getTickCount(); - // Default second stage classifier - Ptr er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml")); - er_filter2->run(grey, regions); - t = (double)getTickCount() - t; - cout << " --------------------------------------------------------------------------------------------------" << endl; - cout << "\t SECOND STAGE CLASSIFIER done in " << t * 1000. / getTickFrequency() << " ms." << endl; - cout << " --------------------------------------------------------------------------------------------------" << endl; - cout << setw(9) << regions.size() << "\t Extremal Regions selected by the second stage of the sequential classifier." << endl; - cout << "\t \t (saving into out_second_stage.jpg)" << endl; - cout << " --------------------------------------------------------------------------------------------------" << endl; +// helper functions - er_filter2.release(); - - // draw regions - mask = mask*0; - for (int r=0; r<(int)regions.size(); r++) - er_draw(grey, mask, regions.at(r)); - mask = 255-mask; - imwrite("out_second_stage.jpg", mask); +void show_help_and_exit(const char *cmd) +{ + cout << endl << cmd << endl << endl; + cout << "Demo program of the Extremal Region Filter algorithm described in " << endl; + cout << "Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012" << endl << endl; + cout << " Usage: " << cmd << " " << endl; + cout << " Default classifier files (trained_classifierNM*.xml) must be in current directory" << endl << endl; + exit(-1); +} - if (argc > 2) +void groups_draw(Mat &src, vector &groups) +{ + for (int i=groups.size()-1; i>=0; i--) { - Mat tmp_mask = (255-gt) & (255-mask(Rect(Point(1,1),Size(mask.cols-2,mask.rows-2)))); - cout << "Recall for the 2nd stage filter = " << (float)countNonZero(tmp_mask) / countNonZero(255-gt) << endl; + if (src.type() == CV_8UC3) + rectangle(src,groups.at(i).tl(),groups.at(i).br(),Scalar( 0, 255, 255 ), 3, 8 ); + else + rectangle(src,groups.at(i).tl(),groups.at(i).br(),Scalar( 255 ), 3, 8 ); } +} - regions.clear(); +void er_draw(Mat &src, Mat &dst, ERStat& er) +{ + + if (er.parent != NULL) // deprecate the root region + { + int newMaskVal = 255; + int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY; + floodFill(src,dst,Point(er.pixel%src.cols,er.pixel/src.cols),Scalar(255),0,Scalar(er.level),Scalar(0),flags); + } }