begin to split the fuzzy pinyin options
authorPeng Wu <alexepico@gmail.com>
Wed, 14 Sep 2011 04:18:12 +0000 (12:18 +0800)
committerPeng Wu <alexepico@gmail.com>
Wed, 14 Sep 2011 04:26:59 +0000 (12:26 +0800)
src/storage/phrase_index.cpp
src/storage/phrase_index.h
src/storage/pinyin_base.cpp
src/storage/pinyin_custom.h
src/storage/pinyin_large_table.cpp

index b433904..27e9095 100644 (file)
@@ -75,10 +75,9 @@ void PhraseItem::increase_pinyin_possibility(PinyinCustomSettings & custom,
            i * ( phrase_length * sizeof(PinyinKey) + sizeof(guint32) );
        guint32 * freq = (guint32 *)(pinyin_begin + phrase_length * sizeof(PinyinKey));
        total_freq += *freq;
-       if ( 0 == pinyin_compare_with_ambiguities(custom,
-                                                 (PinyinKey *)pinyin_begin,
-                                                 pinyin_keys,
-                                                 phrase_length)){
+       if ( 0 == pinyin_compare_with_ambiguities
+             (custom, pinyin_keys,
+              (PinyinKey *)pinyin_begin, phrase_length) ){
            //protect against total_freq overflow.
            if ( delta > 0 && total_freq > total_freq + delta )
                return;
index e1d4de6..c82ed80 100644 (file)
@@ -102,10 +102,9 @@ public:
                i * ( phrase_length * sizeof(PinyinKey) + sizeof(guint32) );
            guint32 * freq = (guint32 *)(pinyin_begin + phrase_length * sizeof(PinyinKey));
            total_freq += *freq;
-           if ( 0 == pinyin_compare_with_ambiguities(custom, 
-                                                     (PinyinKey *)pinyin_begin,
-                                                     pinyin_keys,
-                                                     phrase_length)){
+           if ( 0 == pinyin_compare_with_ambiguities
+                 (custom,  pinyin_keys,
+                  (PinyinKey *)pinyin_begin,phrase_length) ){
                matched += *freq;
            }
        }
index 467bdb3..eb45396 100644 (file)
@@ -1652,57 +1652,71 @@ int pinyin_compare_initial (const PinyinCustomSettings &custom,
                            PinyinInitial lhs,
                            PinyinInitial rhs)
 {
-       if ((lhs == rhs) ||
-               (custom.use_ambiguities [PINYIN_AmbZhiZi] &&
-                ((lhs == PINYIN_Zhi && rhs == PINYIN_Zi) ||
-                 (lhs == PINYIN_Zi && rhs == PINYIN_Zhi))) ||
-                         
-               (custom.use_ambiguities [PINYIN_AmbChiCi] &&
-                ((lhs == PINYIN_Chi && rhs == PINYIN_Ci) ||
-                 (lhs == PINYIN_Ci && rhs == PINYIN_Chi))) ||
-                         
-               (custom.use_ambiguities [PINYIN_AmbShiSi] &&
-                ((lhs == PINYIN_Shi && rhs == PINYIN_Si) ||
-                 (lhs == PINYIN_Si && rhs == PINYIN_Shi))) ||
-
-               (custom.use_ambiguities [PINYIN_AmbLeRi] && 
-                ((lhs == PINYIN_Le && rhs == PINYIN_Ri) ||
-                 (lhs == PINYIN_Ri && rhs == PINYIN_Le))) ||
-
-               (custom.use_ambiguities [PINYIN_AmbNeLe] && 
-                ((lhs == PINYIN_Ne && rhs == PINYIN_Le) ||
-                 (lhs == PINYIN_Le && rhs == PINYIN_Ne))) ||
-
-               (custom.use_ambiguities [PINYIN_AmbFoHe] && 
-                ((lhs == PINYIN_Fo && rhs == PINYIN_He) ||
-                 (lhs == PINYIN_He && rhs == PINYIN_Fo))) ||
-
-               (custom.use_ambiguities [PINYIN_AmbGeKe] &&
-                ((lhs == PINYIN_Ge && rhs == PINYIN_Ke) ||
-                 (lhs == PINYIN_Ke && rhs == PINYIN_Ge)))
-           )
-         return 0;
-       else return (lhs - rhs);
+    if ((lhs == rhs) ||
+
+        (custom.use_ambiguities [PINYIN_AmbCiChi] &&
+         (lhs == PINYIN_Ci && rhs == PINYIN_Chi)) ||
+        (custom.use_ambiguities [PINYIN_AmbChiCi] &&
+         (lhs == PINYIN_Chi && rhs == PINYIN_Ci)) ||
+
+        (custom.use_ambiguities [PINYIN_AmbZiZhi] &&
+         (lhs == PINYIN_Zi && rhs == PINYIN_Zhi)) ||
+        (custom.use_ambiguities [PINYIN_AmbZhiZi] &&
+         (lhs == PINYIN_Zhi && rhs == PINYIN_Zi)) ||
+
+        (custom.use_ambiguities [PINYIN_AmbSiShi] &&
+         (lhs == PINYIN_Si && rhs == PINYIN_Shi)) ||
+        (custom.use_ambiguities [PINYIN_AmbShiSi] &&
+         (lhs == PINYIN_Shi && rhs == PINYIN_Si)) ||
+
+        (custom.use_ambiguities [PINYIN_AmbLeNe] &&
+         (lhs == PINYIN_Le && rhs == PINYIN_Ne)) ||
+        (custom.use_ambiguities [PINYIN_AmbNeLe] &&
+         (lhs == PINYIN_Ne && rhs == PINYIN_Le)) ||
+
+        (custom.use_ambiguities [PINYIN_AmbLeRi] &&
+         (lhs == PINYIN_Le && rhs == PINYIN_Ri)) ||
+        (custom.use_ambiguities [PINYIN_AmbRiLe] &&
+         (lhs == PINYIN_Ri && rhs == PINYIN_Le)) ||
+
+        (custom.use_ambiguities [PINYIN_AmbFoHe] &&
+         (lhs == PINYIN_Fo && rhs == PINYIN_He)) ||
+        (custom.use_ambiguities [PINYIN_AmbHeFo] &&
+         (lhs == PINYIN_He && rhs == PINYIN_Fo)) ||
+
+        (custom.use_ambiguities [PINYIN_AmbGeKe] &&
+         (lhs == PINYIN_Ge && rhs == PINYIN_Ke)) ||
+        (custom.use_ambiguities [PINYIN_AmbKeGe] &&
+         (lhs == PINYIN_Ke && rhs == PINYIN_Ge))
+        )
+        return 0;
+    else return (lhs - rhs);
 }
 
 int pinyin_compare_final (const PinyinCustomSettings &custom,
                          PinyinFinal lhs,
                          PinyinFinal rhs)
 {
-    if(((lhs == rhs) ||
-        (custom.use_ambiguities [PINYIN_AmbAnAng] &&
-         ((lhs == PINYIN_An && rhs == PINYIN_Ang) ||
-          (lhs == PINYIN_Ang && rhs == PINYIN_An))) ||
-              
-        (custom.use_ambiguities [PINYIN_AmbEnEng] &&
-         ((lhs == PINYIN_En && rhs == PINYIN_Eng) ||
-          (lhs == PINYIN_Eng && rhs == PINYIN_En))) ||
-              
-         (custom.use_ambiguities [PINYIN_AmbInIng] &&
-         ((lhs == PINYIN_In && rhs == PINYIN_Ing) ||
-          (lhs == PINYIN_Ing && rhs == PINYIN_In)))))
+    if((lhs == rhs) ||
+
+       (custom.use_ambiguities [PINYIN_AmbAnAng] &&
+        (lhs == PINYIN_An && rhs == PINYIN_Ang)) ||
+       (custom.use_ambiguities [PINYIN_AmbAngAn] &&
+        (lhs == PINYIN_Ang && rhs == PINYIN_An)) ||
+
+       (custom.use_ambiguities [PINYIN_AmbEnEng] &&
+        (lhs == PINYIN_En && rhs == PINYIN_Eng)) ||
+       (custom.use_ambiguities [PINYIN_AmbEngEn] &&
+        (lhs == PINYIN_Eng && rhs == PINYIN_En)) ||
+
+       (custom.use_ambiguities [PINYIN_AmbInIng] &&
+        (lhs == PINYIN_In && rhs == PINYIN_Ing)) ||
+       (custom.use_ambiguities [PINYIN_AmbIngIn] &&
+        (lhs == PINYIN_Ing && rhs == PINYIN_In))
+       )
         return 0;
-    else if (custom.use_incomplete && (lhs == PINYIN_ZeroFinal || rhs == PINYIN_ZeroFinal))
+    else if (custom.use_incomplete &&
+             (lhs == PINYIN_ZeroFinal || rhs == PINYIN_ZeroFinal))
         return 0;
     else return (lhs - rhs);
 }
index 86d4e0c..76c0885 100644 (file)
@@ -36,17 +36,27 @@ namespace pinyin{
 enum PinyinAmbiguity
 {
     PINYIN_AmbAny= 0,
-    PINYIN_AmbZhiZi,
+    PINYIN_AmbCiChi,
     PINYIN_AmbChiCi,
+    PINYIN_AmbZiZhi,
+    PINYIN_AmbZhiZi,
+    PINYIN_AmbSiShi,
     PINYIN_AmbShiSi,
+    PINYIN_AmbLeNe,
     PINYIN_AmbNeLe,
-    PINYIN_AmbLeRi,
     PINYIN_AmbFoHe,
+    PINYIN_AmbHeFo,
+    PINYIN_AmbLeRi,
+    PINYIN_AmbRiLe,
+    PINYIN_AmbKeGe,
     PINYIN_AmbGeKe,
     PINYIN_AmbAnAng,
+    PINYIN_AmbAngAn,
     PINYIN_AmbEnEng,
+    PINYIN_AmbEngEn,
     PINYIN_AmbInIng,
-    PINYIN_AmbLast = PINYIN_AmbInIng
+    PINYIN_AmbIngIn,
+    PINYIN_AmbLast = PINYIN_AmbIngIn
 };
 
 /**
index d9094a5..29febce 100644 (file)
@@ -103,8 +103,8 @@ int PinyinBitmapIndexLevel::initial_level_search(int phrase_length,
 
 #define MATCH(AMBIGUITY, ORIGIN, ANOTHER)  case ORIGIN:                        \
     {                                                                   \
-       result |= final_level_search((PinyinInitial)first_key.m_initial,\
-                                   phrase_length, keys, ranges);               \
+       result |= final_level_search((PinyinInitial)first_key.m_initial, \
+                                     phrase_length, keys, ranges);      \
        if ( custom.use_ambiguities [AMBIGUITY] ){                      \
            result |= final_level_search(ANOTHER,                       \
                                         phrase_length, keys, ranges);  \
@@ -119,28 +119,28 @@ int PinyinBitmapIndexLevel::initial_level_search(int phrase_length,
     PinyinCustomSettings &  custom= *m_custom;
     
     switch(first_key.m_initial){
-       
-       MATCH(PINYIN_AmbZhiZi, PINYIN_Zi, PINYIN_Zhi);
-       MATCH(PINYIN_AmbZhiZi, PINYIN_Zhi, PINYIN_Zi);
-       MATCH(PINYIN_AmbChiCi, PINYIN_Ci, PINYIN_Chi);
+
+       MATCH(PINYIN_AmbCiChi, PINYIN_Ci, PINYIN_Chi);
        MATCH(PINYIN_AmbChiCi, PINYIN_Chi, PINYIN_Ci);
-       MATCH(PINYIN_AmbShiSi, PINYIN_Si, PINYIN_Shi);
+       MATCH(PINYIN_AmbZiZhi, PINYIN_Zi, PINYIN_Zhi);
+       MATCH(PINYIN_AmbZhiZi, PINYIN_Zhi, PINYIN_Zi);
+       MATCH(PINYIN_AmbSiShi, PINYIN_Si, PINYIN_Shi);
        MATCH(PINYIN_AmbShiSi, PINYIN_Shi, PINYIN_Si);
-       MATCH(PINYIN_AmbLeRi, PINYIN_Ri, PINYIN_Le);
+       MATCH(PINYIN_AmbRiLe, PINYIN_Ri, PINYIN_Le);
        MATCH(PINYIN_AmbNeLe, PINYIN_Ne, PINYIN_Le);
        MATCH(PINYIN_AmbFoHe, PINYIN_Fo, PINYIN_He);
-       MATCH(PINYIN_AmbFoHe, PINYIN_He, PINYIN_Fo);
+       MATCH(PINYIN_AmbHeFo, PINYIN_He, PINYIN_Fo);
         MATCH(PINYIN_AmbGeKe, PINYIN_Ge, PINYIN_Ke);
-        MATCH(PINYIN_AmbGeKe, PINYIN_Ke, PINYIN_Ge);
+        MATCH(PINYIN_AmbKeGe, PINYIN_Ke, PINYIN_Ge);
 
     case PINYIN_Le:
        {
            result |= final_level_search((PinyinInitial)first_key.m_initial, 
-                                       phrase_length, keys, ranges);  
-           if ( custom.use_ambiguities [PINYIN_AmbLeRi] )              
+                                         phrase_length, keys, ranges);
+           if ( custom.use_ambiguities [PINYIN_AmbLeRi] )
                result |= final_level_search(PINYIN_Ri, phrase_length,
                                             keys, ranges);     
-           if ( custom.use_ambiguities [PINYIN_AmbNeLe] )              
+           if ( custom.use_ambiguities [PINYIN_AmbLeNe] )
                result |= final_level_search(PINYIN_Ne, phrase_length, 
                                             keys, ranges);
            return result;
@@ -161,15 +161,15 @@ int PinyinBitmapIndexLevel::final_level_search(PinyinInitial initial,
                                               /* out */ PhraseIndexRanges ranges) const{
 #define MATCH(AMBIGUITY, ORIGIN, ANOTHER) case ORIGIN:                         \
     {                                                                  \
-       result = tone_level_search(initial,(PinyinFinal) first_key.m_final,\
-                                  phrase_length, keys, ranges);                \
+       result = tone_level_search(initial,(PinyinFinal) first_key.m_final, \
+                                  phrase_length, keys, ranges);        \
        if ( custom.use_ambiguities [AMBIGUITY] ){                      \
            result |= tone_level_search(initial, ANOTHER,               \
                                        phrase_length, keys, ranges);   \
        }                                                               \
        return result;                                                  \
     }
-    
+
     int result = SEARCH_NONE;
     PinyinKey& first_key = keys[0];
     PinyinCustomSettings &  custom= *m_custom;
@@ -187,12 +187,12 @@ int PinyinBitmapIndexLevel::final_level_search(PinyinInitial initial,
        }
        
        MATCH(PINYIN_AmbAnAng, PINYIN_An, PINYIN_Ang);
-       MATCH(PINYIN_AmbAnAng, PINYIN_Ang, PINYIN_An);
+       MATCH(PINYIN_AmbAngAn, PINYIN_Ang, PINYIN_An);
        MATCH(PINYIN_AmbEnEng, PINYIN_En, PINYIN_Eng);
-       MATCH(PINYIN_AmbEnEng, PINYIN_Eng, PINYIN_En);
+       MATCH(PINYIN_AmbEngEn, PINYIN_Eng, PINYIN_En);
        MATCH(PINYIN_AmbInIng, PINYIN_In, PINYIN_Ing);
-       MATCH(PINYIN_AmbInIng, PINYIN_Ing, PINYIN_In);
-       
+       MATCH(PINYIN_AmbIngIn, PINYIN_Ing, PINYIN_In);
+
     default:
        {
            return tone_level_search(initial,(PinyinFinal)first_key.m_final,