make StringSimilarityMatcher instantiation cheap

author Oswald Buddenhagen <oswald.buddenhagen@nokia.com>

Fri, 7 Sep 2012 17:08:42 +0000 (19:08 +0200)

committer The Qt Project <gerrit-noreply@qt-project.org>

Thu, 20 Sep 2012 12:35:09 +0000 (14:35 +0200)
author Oswald Buddenhagen <oswald.buddenhagen@nokia.com>
Fri, 7 Sep 2012 17:08:42 +0000 (19:08 +0200)
committer The Qt Project <gerrit-noreply@qt-project.org>
Thu, 20 Sep 2012 12:35:09 +0000 (14:35 +0200)
diff --git a/src/linguist/shared/simtexth.cpp b/src/linguist/shared/simtexth.cpp

index 479e7c6..902d776 100644 (file)
--- a/src/linguist/shared/simtexth.cpp
+++ b/src/linguist/shared/simtexth.cpp
@@ -132,50 +132,38 @@ static const int bitCount[256] = {
      4,  5,  5,  6,  5,  6,  6,  7,  5,  6,  6,  7,  6,  7,  7,  8
  };
  
-struct CoMatrix
+static inline void setCoOccurence(CoMatrix &m, char c, char d)
  {
+    int k = indexOf[(uchar) c] + 20 * indexOf[(uchar) d];
+    m.b[k >> 3] |= (1 << (k & 0x7));
+}
+
+CoMatrix::CoMatrix(const QString &str)
+{
+    QByteArray ba = str.toUtf8();
+    const char *text = ba.constData();
+    char c = '\0', d;
+    memset( b, 0, 52 );
      /*
-      The matrix has 20 * 20 = 400 entries.  This requires 50 bytes, or 13
-      words.  Some operations are performed on words for more efficiency.
+      The Knuth books are not in the office only for show; they help make
+      loops 30% faster and 20% as readable.
      */
-    union {
-        quint8 b[52];
-        quint32 w[13];
-    };
-
-    CoMatrix() { memset( b, 0, 52 ); }
-
-    CoMatrix(const QString &str)
-    {
-        QByteArray ba = str.toUtf8();
-        const char *text = ba.constData();
-        char c = '\0', d;
-        memset( b, 0, 52 );
-        /*
-          The Knuth books are not in the office only for show; they help make
-          loops 30% faster and 20% as readable.
-        */
-        while ( (d = *text) != '\0' ) {
-            setCoOccurence( c, d );
-            if ( (c = *++text) != '\0' ) {
-                setCoOccurence( d, c );
-                text++;
-            }
+    while ( (d = *text) != '\0' ) {
+        setCoOccurence(*this, c, d);
+        if ( (c = *++text) != '\0' ) {
+            setCoOccurence(*this, d, c);
+            text++;
          }
      }
+}
  
-    void setCoOccurence( char c, char d ) {
-        int k = indexOf[(uchar) c] + 20 * indexOf[(uchar) d];
-        b[k >> 3] |= (1 << (k & 0x7));
-    }
-
-    int worth() const {
-        int w = 0;
-        for ( int i = 0; i < 50; i++ )
-            w += bitCount[b[i]];
-        return w;
-    }
-};
+static inline int worth(const CoMatrix &m)
+{
+    int w = 0;
+    for (int i = 0; i < 50; i++)
+        w += bitCount[m.b[i]];
+    return w;
+}
  
  static inline CoMatrix reunion(const CoMatrix &m, const CoMatrix &n)
  {
@@ -194,8 +182,8 @@ static inline CoMatrix intersection(const CoMatrix &m, const CoMatrix &n)
  }
  
  StringSimilarityMatcher::StringSimilarityMatcher(const QString &stringToMatch)
+    : m_cm(stringToMatch)
  {
-    m_cm = new CoMatrix(stringToMatch);
      m_length = stringToMatch.length();
  }
  
@@ -203,16 +191,11 @@ int StringSimilarityMatcher::getSimilarityScore(const QString &strCandidate)
  {
      CoMatrix cmTarget(strCandidate);
      int delta = qAbs(m_length - strCandidate.size());
-    int score = ( (intersection(*m_cm, cmTarget).worth() + 1) << 10 ) /
-        ( reunion(*m_cm, cmTarget).worth() + (delta << 1) + 1 );
+    int score = ( (worth(intersection(m_cm, cmTarget)) + 1) << 10 ) /
+        ( worth(reunion(m_cm, cmTarget)) + (delta << 1) + 1 );
      return score;
  }
  
-StringSimilarityMatcher::~StringSimilarityMatcher()
-{
-    delete m_cm;
-}
-
  /**
   * Checks how similar two strings are.
   * The return value is the score, and a higher score is more similar
@@ -226,8 +209,8 @@ int getSimilarityScore(const QString &str1, const QString &str2)
      CoMatrix cm(str1);
      int delta = qAbs(str1.size() - str2.size());
  
-    int score = ( (intersection(cm, cmTarget).worth() + 1) << 10 )
-        / ( reunion(cm, cmTarget).worth() + (delta << 1) + 1 );
+    int score = ( (worth(intersection(cm, cmTarget)) + 1) << 10 )
+        / ( worth(reunion(cm, cmTarget)) + (delta << 1) + 1 );
  
      return score;
  }
diff --git a/src/linguist/shared/simtexth.h b/src/linguist/shared/simtexth.h

index 715bf04..7cfe00a 100644 (file)
--- a/src/linguist/shared/simtexth.h
+++ b/src/linguist/shared/simtexth.h
@@ -71,7 +71,21 @@ inline bool operator!=( const Candidate& c, const Candidate& d ) {
  
  typedef QList<Candidate> CandidateList;
  
-struct CoMatrix;
+struct CoMatrix
+{
+    CoMatrix(const QString &str);
+    CoMatrix() {}
+
+    /*
+      The matrix has 20 * 20 = 400 entries.  This requires 50 bytes, or 13
+      words.  Some operations are performed on words for more efficiency.
+    */
+    union {
+        quint8 b[52];
+        quint32 w[13];
+    };
+};
+
  /**
   * This class is more efficient for searching through a large array of candidate strings, since we only
   * have to construct the CoMatrix for the \a stringToMatch once,
@@ -81,11 +95,10 @@ struct CoMatrix;
  class StringSimilarityMatcher {
  public:
      StringSimilarityMatcher(const QString &stringToMatch);
-    ~StringSimilarityMatcher();
      int getSimilarityScore(const QString &strCandidate);
  
  private:
-    CoMatrix *m_cm;
+    CoMatrix m_cm;
      int m_length;
  };
author	Oswald Buddenhagen <oswald.buddenhagen@nokia.com>
	Fri, 7 Sep 2012 17:08:42 +0000 (19:08 +0200)
committer	The Qt Project <gerrit-noreply@qt-project.org>
	Thu, 20 Sep 2012 12:35:09 +0000 (14:35 +0200)
src/linguist/shared/simtexth.cpp		patch \| blob \| history
src/linguist/shared/simtexth.h		patch \| blob \| history