write add_pronunciation
authorPeng Wu <alexepico@gmail.com>
Thu, 28 Mar 2013 04:21:41 +0000 (12:21 +0800)
committerPeng Wu <alexepico@gmail.com>
Thu, 28 Mar 2013 04:32:01 +0000 (12:32 +0800)
src/pinyin.cpp
src/storage/phrase_index.cpp
src/storage/phrase_index.h
tests/storage/test_phrase_index.cpp

index 1c31b1a..918d4b9 100644 (file)
@@ -415,8 +415,8 @@ bool pinyin_iterator_add_phrase(import_iterator_t * iter,
         retval = phrase_index->remove_phrase_item(token, removed_item);
         if (ERROR_OK == retval) {
             /* maybe check whether there are duplicated pronunciations here. */
-            removed_item->append_pronunciation((ChewingKey *)keys->data,
-                                               count);
+            removed_item->add_pronunciation((ChewingKey *)keys->data,
+                                            count);
             phrase_index->add_phrase_item(token, removed_item);
             delete removed_item;
             result = true;
@@ -439,7 +439,7 @@ bool pinyin_iterator_add_phrase(import_iterator_t * iter,
                     (keys->len, (ChewingKey *)(keys->data), token);
 
                 item.set_phrase_string(len_phrase, ucs4_phrase);
-                item.append_pronunciation((ChewingKey *)(keys->data), count);
+                item.add_pronunciation((ChewingKey *)(keys->data), count);
                 phrase_index->add_phrase_item(token, &item);
                 phrase_index->add_unigram_frequency(token,
                                                     count * unigram_factor);
index 67afb97..c462fef 100644 (file)
@@ -42,6 +42,7 @@ bool PhraseItem::get_nth_pronunciation(size_t index, ChewingKey * keys,
         (offset + phrase_length * sizeof(ChewingKey), &freq , sizeof(guint32));
 }
 
+#if 0
 void PhraseItem::append_pronunciation(ChewingKey * keys, guint32 freq){
     guint8 phrase_length = get_phrase_length();
     set_n_pronunciation(get_n_pronunciation() + 1);
@@ -49,6 +50,43 @@ void PhraseItem::append_pronunciation(ChewingKey * keys, guint32 freq){
                         phrase_length * sizeof(ChewingKey));
     m_chunk.set_content(m_chunk.size(), &freq, sizeof(guint32));
 }
+#endif
+
+bool PhraseItem::add_pronunciation(ChewingKey * keys, guint32 delta){
+    guint8 phrase_length = get_phrase_length();
+    guint8 npron = get_n_pronunciation();
+    size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t);
+    char * buf_begin = (char *) m_chunk.begin();
+    guint32 total_freq = 0;
+
+    for (int i = 0; i < npron; ++i) {
+        char * chewing_begin = buf_begin + offset +
+            i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
+        guint32 * freq = (guint32 *)(chewing_begin +
+                                     phrase_length * sizeof(ChewingKey));
+
+        total_freq += *freq;
+
+        if (0 == pinyin_exact_compare2
+            (keys, (ChewingKey *)chewing_begin, phrase_length)) {
+            /* found the exact match pinyin keys. */
+
+           /* protect against total_freq overflow. */
+            if (delta > 0 && total_freq > total_freq + delta)
+                return false;
+
+            *freq += delta;
+            total_freq += delta;
+            return true;
+        }
+    }
+
+    set_n_pronunciation(npron + 1);
+    m_chunk.set_content(m_chunk.size(), keys,
+                        phrase_length * sizeof(ChewingKey));
+    m_chunk.set_content(m_chunk.size(), &delta, sizeof(guint32));
+    return true;
+}
 
 void PhraseItem::remove_nth_pronunciation(size_t index){
     guint8 phrase_length = get_phrase_length();
@@ -74,21 +112,25 @@ void PhraseItem::increase_pronunciation_possibility(pinyin_option_t options,
                                                     gint32 delta){
     guint8 phrase_length = get_phrase_length();
     guint8 npron = get_n_pronunciation();
-    size_t offset = phrase_item_header + phrase_length * sizeof ( ucs4_t );
+    size_t offset = phrase_item_header + phrase_length * sizeof(ucs4_t);
     char * buf_begin = (char *) m_chunk.begin();
     guint32 total_freq = 0;
-    for ( int i = 0 ; i < npron ; ++i){
+
+    for (int i = 0; i < npron; ++i) {
        char * chewing_begin = buf_begin + offset +
-           i * ( phrase_length * sizeof(ChewingKey) + sizeof(guint32) );
+           i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
        guint32 * freq = (guint32 *)(chewing_begin +
                                      phrase_length * sizeof(ChewingKey));
        total_freq += *freq;
-       if ( 0 == pinyin_compare_with_ambiguities2
-             (options, keys,
-              (ChewingKey *)chewing_begin, phrase_length) ){
-           //protect against total_freq overflow.
-           if ( delta > 0 && total_freq > total_freq + delta )
+
+       if (0 == pinyin_compare_with_ambiguities2
+            (options, keys,
+             (ChewingKey *)chewing_begin, phrase_length)) {
+
+           /* protect against total_freq overflow. */
+           if (delta > 0 && total_freq > total_freq + delta)
                return;
+
            *freq += delta;
            total_freq += delta;
        }
@@ -515,7 +557,7 @@ bool FacadePhraseIndex::load_text(guint8 phrase_index, FILE * infile){
        parser.parse(options, keys, key_rests, pinyin, strlen(pinyin));
        
        if (item_ptr->get_phrase_length() == keys->len) {
-            item_ptr->append_pronunciation((ChewingKey *)keys->data, freq);
+            item_ptr->add_pronunciation((ChewingKey *)keys->data, freq);
         } else {
             fprintf(stderr, "FacadePhraseIndex::load_text:%s\t%s\n",
                     pinyin, phrase);
index 3654369..6a14ff7 100644 (file)
@@ -213,14 +213,15 @@ public:
                               /* out */ guint32 & freq);
 
     /**
-     * PhraseItem::append_pronunciation:
+     * PhraseItem::add_pronunciation:
      * @keys: the pronunciation keys.
-     * @freq: the frequency of the pronunciation.
+     * @delta: the delta of the frequency of the pronunciation.
+     * @returns: whether the add operation is successful.
      *
-     * Append one pronunciation.
+     * Add one pronunciation.
      *
      */
-    void append_pronunciation(ChewingKey * keys, guint32 freq);
+    bool add_pronunciation(ChewingKey * keys, guint32 delta);
 
     /**
      * PhraseItem::remove_nth_pronunciation:
index 807b29c..96f9ca0 100644 (file)
@@ -14,8 +14,8 @@ int main(int argc, char * argv[]){
 
 
     phrase_item.set_phrase_string(1, &string1);
-    phrase_item.append_pronunciation(&key1, 100);
-    phrase_item.append_pronunciation(&key2, 300);
+    phrase_item.add_pronunciation(&key1, 100);
+    phrase_item.add_pronunciation(&key2, 300);
 
     assert(phrase_item.get_phrase_length() == 1);
 
@@ -104,7 +104,7 @@ int main(int argc, char * argv[]){
 
     phrase_index.get_phrase_item(16777222, item2);
     assert(item2.get_phrase_length() == 1);
-    assert(item2.get_n_pronunciation() == 6);
+    assert(item2.get_n_pronunciation() == 2);
 
     return 0;
 }