From: Peng Wu Date: Tue, 30 Oct 2012 08:18:21 +0000 (+0800) Subject: write SingleGram::mask_out X-Git-Tag: 0.8.91~73 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ef7b4c730872bb0312e7cdf0d10965881931dcd0;p=platform%2Fupstream%2Flibpinyin.git write SingleGram::mask_out --- diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp index d366192..2b88284 100644 --- a/src/storage/ngram.cpp +++ b/src/storage/ngram.cpp @@ -55,6 +55,53 @@ bool SingleGram::set_total_freq(guint32 total){ return true; } +guint32 SingleGram::get_length(){ + /* get the number of items. */ + const SingleGramItem * begin = (const SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + const SingleGramItem * end = (const SingleGramItem *) m_chunk.end(); + + const guint32 length = end - begin; + + if (0 == length) { + /* no items here, total freq should be zero. */ + guint32 total_freq = 0; + assert(get_total_freq(total_freq)); + assert(0 == total_freq); + } + + return length; +} + +guint32 SingleGram::mask_out(phrase_token_t mask, phrase_token_t value){ + guint32 removed_items = 0; + + guint32 total_freq = 0; + assert(get_total_freq(total_freq)); + + const SingleGramItem * begin = (const SingleGramItem *) + ((const char *)(m_chunk.begin()) + sizeof(guint32)); + const SingleGramItem * end = (const SingleGramItem *) m_chunk.end(); + + for (const SingleGramItem * cur = begin; cur != end; ++cur) { + if ((mask & cur->m_token) != value) + continue; + + total_freq -= cur->m_freq; + size_t offset = sizeof(guint32) + + sizeof(SingleGramItem) * (cur - begin); + m_chunk.remove_content(offset, sizeof(SingleGramItem)); + + /* update chunk end. */ + end = (const SingleGramItem *) m_chunk.end(); + ++removed_items; + --cur; + } + + assert(set_total_freq(total_freq)); + return removed_items; +} + bool SingleGram::prune(){ assert(false); #if 0 diff --git a/src/storage/ngram.h b/src/storage/ngram.h index 9bf4190..9509155 100644 --- a/src/storage/ngram.h +++ b/src/storage/ngram.h @@ -150,6 +150,10 @@ public: * */ bool set_total_freq(guint32 total); + + guint32 get_length(); + + guint32 mask_out(phrase_token_t mask, phrase_token_t value); /** * SingleGram::prune: