write phrase index logger format in progress
authorPeng Wu <alexepico@gmail.com>
Wed, 10 Aug 2011 04:58:13 +0000 (12:58 +0800)
committerPeng Wu <alexepico@gmail.com>
Wed, 10 Aug 2011 04:58:13 +0000 (12:58 +0800)
src/storage/phrase_index.cpp
src/storage/phrase_index.h
src/storage/phrase_index_logger.h

index 59b166f..2be0b5c 100644 (file)
@@ -296,6 +296,79 @@ bool SubPhraseIndex::store(MemoryChunk * new_chunk,
     return true;
 }
 
+bool SubPhraseIndex::diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger){
+    PhraseIndexRange oldrange, currange, range;
+    oldone->get_range(oldrange); get_range(currange);
+    range.m_range_begin = std_lite::min(oldrange.m_range_begin,
+                                        currange.m_range_begin);
+    range.m_range_end = std_lite::max(oldrange.m_range_end,
+                                     currange.m_range_end);
+    PhraseItem olditem, newitem;
+
+    for (phrase_token_t token = range.m_range_begin;
+         token < range.m_range_end; ++token ){
+        bool oldretval = ERROR_OK == oldone->get_phrase_item(token, olditem);
+        bool newretval = ERROR_OK == get_phrase_item(token, newitem);
+
+        if ( oldretval ){
+            if ( newretval ) { /* compare phrase item. */
+                if ( olditem == newitem )
+                    continue;
+                logger->append_record(LOG_MODIFY_RECORD, token,
+                                      &(olditem.m_chunk), &(newitem.m_chunk));
+            } else { /* remove phrase item. */
+                logger->append_record(LOG_REMOVE_RECORD, token,
+                                      &(olditem.m_chunk), NULL);
+            }
+        } else {
+            if ( newretval ){ /* add phrase item. */
+                logger->append_record(LOG_ADD_RECORD, token,
+                                      NULL, &(newitem.m_chunk));
+            } else { /* both empty. */
+                    /* do nothing. */
+            }
+        }
+    }
+
+    return true;
+}
+
+bool SubPhraseIndex::merge(PhraseIndexLogger * logger){
+    LOG_TYPE log_type; phrase_token_t token;
+    MemoryChunk oldchunk, newchunk;
+    PhraseItem olditem, newitem, * tmpitem;
+
+    while(logger->has_next_record()){
+        logger->next_record(log_type, token, &oldchunk, &newchunk);
+
+        switch(log_type){
+        case LOG_ADD_RECORD:{
+            assert( 0 == oldchunk.size() );
+            newitem.m_chunk.set_chunk(newchunk.begin(), newchunk.size(),
+                                      NULL);
+            add_phrase_item(token, &newitem);
+            break;
+        }
+        case LOG_REMOVE_RECORD:{
+            assert( 0 == newchunk.size() );
+            tmpitem = NULL;
+            remove_phrase_item(token, tmpitem);
+            olditem.m_chunk.set_chunk(oldchunk.begin(), oldchunk.size(),
+                                   NULL);
+            if (olditem != *tmpitem)
+                return false;
+            break;
+        }
+        case LOG_MODIFY_RECORD:{
+            TODO:
+            break;
+        }
+        default:
+            assert(false);
+        }
+    }
+}
+
 bool FacadePhraseIndex::load_text(guint8 phrase_index, FILE * infile){
     SubPhraseIndex * & sub_phrases = m_sub_phrase_indices[phrase_index];
     if ( !sub_phrases ){
index f2648e8..d853aee 100644 (file)
@@ -67,10 +67,12 @@ public:
        memset(m_chunk.begin(), 0, m_chunk.size());
     }
 
-    PhraseItem(MemoryChunk chunk){
-       m_chunk = chunk;
-       assert ( m_chunk.size() >= phrase_item_header);
+#if 0
+    PhraseItem(MemoryChunk & chunk){
+        m_chunk.set_content(0, chunk->begin(), chunk->size());
+        assert ( m_chunk.size() >= phrase_item_header);
     }
+#endif
 
     /* functions */
     guint8 get_phrase_length(){
@@ -134,6 +136,17 @@ public:
      */
     void append_pronunciation(PinyinKey * pinyin, guint32 freq);
     void remove_nth_pronunciation(size_t index);
+
+    bool operator == (PhraseItem & rhs){
+        if (m_chunk.size() != rhs.m_chunk.size())
+            return false;
+        return memcmp(m_chunk.begin(), rhs.m_chunk.begin(),
+                      m_chunk.size()) == 0;
+    }
+
+    bool operator != (PhraseItem & rhs){
+        return ! (*this == rhs);
+    }
 };
 
 /*
index c961112..3cff9b8 100644 (file)
@@ -87,8 +87,8 @@ public:
     }
 
     /* prolog: has_next_record() returned true. */
-    bool next(LOG_TYPE & log_type, phrase_token_t & token,
-              MemoryChunk * oldone, MemoryChunk * newone){
+    bool next_record(LOG_TYPE & log_type, phrase_token_t & token,
+                     MemoryChunk * oldone, MemoryChunk * newone){
         size_t offset = m_offset;
         m_chunk->get_content(offset, &log_type, sizeof(LOG_TYPE));
         offset += sizeof(LOG_TYPE);
@@ -97,8 +97,7 @@ public:
 
         switch(log_type){
         case LOG_ADD_RECORD:{
-            assert( NULL == oldone);
-            assert( NULL != newone);
+            oldone->set_size(0);
             size_t len = 0;
             m_chunk->get_content(offset, &len, sizeof(size_t));
             offset += sizeof(size_t);
@@ -107,8 +106,7 @@ public:
             break;
         }
         case LOG_REMOVE_RECORD:{
-            assert( NULL != oldone);
-            assert( NULL == newone);
+            newone->set_size(0);
             size_t len = 0;
             m_chunk->get_content(offset, &len, sizeof(size_t));
             offset += sizeof(size_t);
@@ -117,8 +115,6 @@ public:
             break;
         }
         case LOG_MODIFY_RECORD:{
-            assert( NULL != oldone);
-            assert( NULL != newone);
             size_t oldlen = 0, newlen = 0;
             m_chunk->get_content(offset, &oldlen, sizeof(size_t));
             offset += sizeof(size_t);