add load/store method to phrase table
authorPeng Wu <alexepico@gmail.com>
Mon, 6 Sep 2010 08:13:14 +0000 (16:13 +0800)
committerPeng Wu <alexepico@gmail.com>
Mon, 6 Sep 2010 08:13:14 +0000 (16:13 +0800)
src/storage/phrase_large_table.cpp

index b115ceb..53db455 100644 (file)
@@ -335,3 +335,187 @@ bool PhraseLargeTable::load_text(FILE * infile){
     }
     return true;
 }
+
+bool PhraseBitmapIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
+                                  table_offset_t end){
+    reset();
+    char * buf_begin = (char *) chunk->begin();
+    table_offset_t phrase_begin, phrase_end;
+    table_offset_t * index = (table_offset_t *) (buf_begin + offset);
+    phrase_end = *index;
+
+    for ( size_t i = 0; i < PHRASE_Number_Of_Bitmap_Index; ++i) {
+        phrase_begin = phrase_end;
+        index++;
+        phrase_end = *index;
+        if ( phrase_begin == phrase_end ) //null pointer
+            continue;
+        PhraseLengthIndexLevel * phrases = new PhraseLengthIndexLevel;
+        m_phrase_length_indexes[i] = phrases;
+        phrases->load(chunk, phrase_begin, phrase_end - 1);
+        assert( phrase_end <= end );
+        assert( *(buf_begin + phrase_end - 1) == c_separate);
+    }
+    offset += (PHRASE_Number_Of_Bitmap_Index + 1) * sizeof(table_offset_t);
+    assert( c_separate == *(buf_begin + offset) );
+    return true;
+}
+
+bool PhraseBitmapIndexLevel::store(MemoryChunk * new_chunk,
+                                   table_offset_t offset,
+                                   table_offset_t & end){
+    table_offset_t phrase_end;
+    table_offset_t index = offset;
+    offset += (PHRASE_Number_Of_Bitmap_Index + 1) * sizeof(table_offset_t);
+    //add '#'
+    new_chunk->set_content(offset, &c_separate, sizeof(char));
+    offset +=sizeof(char);
+    new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+    index += sizeof(table_offset_t);
+    for ( size_t i = 0; i < PHRASE_Number_Of_Bitmap_Index; ++i) {
+        PhraseLengthIndexLevel * phrases = m_phrase_length_indexes[i];
+        if ( !phrases ) { //null pointer
+            new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+            index += sizeof(table_offset_t);
+            continue;
+        }
+        phrases->store(new_chunk, offset, phrase_end); //has a end '#'
+        offset = phrase_end;
+        //add '#'
+        new_chunk->set_content(offset, &c_separate, sizeof(char));
+        offset += sizeof(char);
+        new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+        index += sizeof(table_offset_t);
+    }
+    end = offset;
+    return true;
+}
+
+bool PhraseLengthIndexLevel::load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){
+    char * buf_begin = (char *) chunk->begin();
+    guint32 nindex = *((guint32 *)(buf_begin + offset));
+    table_offset_t * index = (table_offset_t *)
+        (buf_begin + offset + sizeof(guint32));
+
+    table_offset_t phrase_begin, phrase_end = *index;
+    m_phrase_array_indexes = g_array_new(FALSE, TRUE, sizeof(void *));
+    for ( size_t i = 0; i < nindex; ++i) {
+        phrase_begin = phrase_end;
+        index++;
+        phrase_end = *index;
+        if ( phrase_begin == phrase_end ){
+            void * null = NULL;
+            g_array_append_val(m_phrase_array_indexes, null);
+            continue;
+        }
+
+#define CASE(len) case len:                                             \
+        {                                                               \
+            PhraseArrayIndexLevel<len> * phrase = new PhraseArrayIndexLevel<len>; \
+            phrase->load(chunk, phrase_begin, phrase_end - 1);          \
+            assert( *(buf_begin + phrase_end - 1) == c_separate);       \
+            assert( phrase_end <= end );                                \
+            g_array_append_val(m_phrase_array_indexes, phrase);         \
+            break;                                                      \
+        }
+        switch ( i ){
+           CASE(0);
+           CASE(1);
+           CASE(2);
+           CASE(3);
+           CASE(4);
+           CASE(5);
+           CASE(6);
+           CASE(7);
+           CASE(8);
+           CASE(9);
+           CASE(10);
+           CASE(11);
+           CASE(12);
+           CASE(13);
+           CASE(14);
+           CASE(15);
+       default:
+           assert(false);
+        }
+#undef CASE
+    }
+    offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
+    assert ( c_separate == * (buf_begin + offset) );
+    return true;
+}
+
+bool PhraseLengthIndexLevel::store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
+    guint32 nindex = m_phrase_array_indexes->len;
+    new_chunk->set_content(offset, &nindex, sizeof(guint32));
+    table_offset_t index = offset + sizeof(guint32);
+
+    offset += sizeof(guint32) + (nindex + 1) * sizeof(table_offset_t);
+    new_chunk->set_content(offset, &c_separate, sizeof(char));
+    offset += sizeof(char);
+    new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+    index += sizeof(table_offset_t);
+
+    table_offset_t phrase_end;
+    for ( size_t i = 0; i < m_phrase_array_indexes->len; ++i) {
+#define CASE(len) case len:                                             \
+        {                                                               \
+            PhraseArrayIndexLevel<len> * phrase = g_array_index         \
+                (m_phrase_array_indexes, PhraseArrayIndexLevel<len> *, i); \
+            if ( !phrase ){                                             \
+                new_chunk->set_content                                  \
+                    (index, &offset, sizeof(table_offset_t));           \
+                index += sizeof(table_offset_t);                        \
+                continue;                                               \
+            }                                                           \
+            phrase->store(new_chunk, offset, phrase_end);               \
+            offset = phrase_end;                                        \
+            break;                                                      \
+        }
+        switch ( i ){
+           CASE(0);
+           CASE(1);
+           CASE(2);
+           CASE(3);
+           CASE(4);
+           CASE(5);
+           CASE(6);
+           CASE(7);
+           CASE(8);
+           CASE(9);
+           CASE(10);
+           CASE(11);
+           CASE(12);
+           CASE(13);
+           CASE(14);
+           CASE(15);
+       default:
+           assert(false);
+        }
+        //add '#'
+        new_chunk->set_content(offset, &c_separate, sizeof(char));
+        offset += sizeof(char);
+        new_chunk->set_content(index, &offset, sizeof(table_offset_t));
+        index += sizeof(table_offset_t);
+
+#undef CASE
+    }
+    end = offset;
+    return true;
+}
+
+template<size_t phrase_length>
+bool PhraseArrayIndexLevel<phrase_length>::
+load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end){
+    char * buf_begin = (char *) chunk->begin();
+    m_chunk.set_chunk(buf_begin + offset, end - offset, NULL);
+    return true;
+}
+
+template<size_t phrase_length>
+bool PhraseArrayIndexLevel<phrase_length>::
+store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end) {
+    new_chunk->set_content(offset, m_chunk.begin(), m_chunk.size());
+    end = offset + m_chunk.size();
+    return true;
+}