From 6f3ee371d7118ac04327a7654a519657e8981edd Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Wed, 18 Aug 2010 14:58:57 +0800 Subject: [PATCH] add get_range to phrase index --- src/storage/phrase_index.cpp | 24 ++++++++++++++++++++++++ src/storage/phrase_index.h | 7 +++++++ utils/storage/export_interpolation.cpp | 14 +++++++------- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp index c122803..d7fb4fd 100644 --- a/src/storage/phrase_index.cpp +++ b/src/storage/phrase_index.cpp @@ -327,3 +327,27 @@ bool FacadePhraseIndex::load_text(guint8 phrase_index, FILE * infile){ m_total_freq += m_sub_phrase_indices[phrase_index]->get_phrase_index_total_freq(); return true; } + +int FacadePhraseIndex::get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range){ + SubPhraseIndex * sub_phrase = m_sub_phrase_indices[phrase_index]; + if ( !sub_phrase ) + return ERROR_NO_SUB_PHRASE_INDEX; + + int result = sub_phrase->get_range(range); + if ( result ) + return result; + + range.m_range_begin = PHRASE_INDEX_MAKE_TOKEN(phrase_index, range.m_range_begin); + range.m_range_end = PHRASE_INDEX_MAKE_TOKEN(phrase_index, range.m_range_end); + return ERROR_OK; +} + +int SubPhraseIndex::get_range(/* out */ PhraseIndexRange & range){ + const table_offset_t * begin = (const table_offset_t *)m_phrase_index.begin(); + const table_offset_t * end = (const table_offset_t *)m_phrase_index.end(); + + range.m_range_begin = 0; + range.m_range_end = end - begin; + + return ERROR_OK; +} diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h index 0b532b1..3f94bd3 100755 --- a/src/storage/phrase_index.h +++ b/src/storage/phrase_index.h @@ -161,10 +161,14 @@ public: } } + /* binary memory chunk load/store method */ bool load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end); bool store(MemoryChunk * new_chunk, table_offset_t offset, table_offset_t & end); + + /* get token range in this sub phrase */ + int get_range(/* out */ PhraseIndexRange & range); /* Zero-gram */ guint32 get_phrase_index_total_freq(); @@ -206,6 +210,9 @@ public: bool store(guint8 phrase_index, MemoryChunk * new_chunk); bool unload(guint8 phrase_index); + /* get each sub phrase token range with phrase_index added */ + int get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range); + /* Zero-gram */ guint32 get_phrase_index_total_freq(){ return m_total_freq; diff --git a/utils/storage/export_interpolation.cpp b/utils/storage/export_interpolation.cpp index e91fd74..43a2c61 100644 --- a/utils/storage/export_interpolation.cpp +++ b/utils/storage/export_interpolation.cpp @@ -53,16 +53,16 @@ int main(int argc, char * argv[]){ void gen_unigram(FILE * output, FacadePhraseIndex * phrase_index) { fprintf(output, "\\1-gram\n"); for ( size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; i++) { - /* Generate each phrase index library */ - const phrase_token_t min = PHRASE_INDEX_MAKE_TOKEN(i, token_min); - const phrase_token_t max = PHRASE_INDEX_MAKE_TOKEN(i, token_max); + + PhraseIndexRange range; + int result = phrase_index->get_range(i, range); + if ( result ) + continue; PhraseItem item; - for ( size_t j = min; j < max; j++) { + for ( size_t j = range.m_range_begin; j < range.m_range_end; j++) { int result = phrase_index->get_phrase_item(j, item); - if ( result == ERROR_NO_SUB_PHRASE_INDEX || - result == ERROR_OUT_OF_RANGE) - break; + if ( result == ERROR_NO_ITEM ) continue; assert( result == ERROR_OK); -- 2.7.4