m_total_freq += m_sub_phrase_indices[phrase_index]->get_phrase_index_total_freq();
return true;
}
+
+int FacadePhraseIndex::get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range){
+ SubPhraseIndex * sub_phrase = m_sub_phrase_indices[phrase_index];
+ if ( !sub_phrase )
+ return ERROR_NO_SUB_PHRASE_INDEX;
+
+ int result = sub_phrase->get_range(range);
+ if ( result )
+ return result;
+
+ range.m_range_begin = PHRASE_INDEX_MAKE_TOKEN(phrase_index, range.m_range_begin);
+ range.m_range_end = PHRASE_INDEX_MAKE_TOKEN(phrase_index, range.m_range_end);
+ return ERROR_OK;
+}
+
+int SubPhraseIndex::get_range(/* out */ PhraseIndexRange & range){
+ const table_offset_t * begin = (const table_offset_t *)m_phrase_index.begin();
+ const table_offset_t * end = (const table_offset_t *)m_phrase_index.end();
+
+ range.m_range_begin = 0;
+ range.m_range_end = end - begin;
+
+ return ERROR_OK;
+}
}
}
+ /* binary memory chunk load/store method */
bool load(MemoryChunk * chunk,
table_offset_t offset, table_offset_t end);
bool store(MemoryChunk * new_chunk,
table_offset_t offset, table_offset_t & end);
+
+ /* get token range in this sub phrase */
+ int get_range(/* out */ PhraseIndexRange & range);
/* Zero-gram */
guint32 get_phrase_index_total_freq();
bool store(guint8 phrase_index, MemoryChunk * new_chunk);
bool unload(guint8 phrase_index);
+ /* get each sub phrase token range with phrase_index added */
+ int get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range);
+
/* Zero-gram */
guint32 get_phrase_index_total_freq(){
return m_total_freq;
void gen_unigram(FILE * output, FacadePhraseIndex * phrase_index) {
fprintf(output, "\\1-gram\n");
for ( size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; i++) {
- /* Generate each phrase index library */
- const phrase_token_t min = PHRASE_INDEX_MAKE_TOKEN(i, token_min);
- const phrase_token_t max = PHRASE_INDEX_MAKE_TOKEN(i, token_max);
+
+ PhraseIndexRange range;
+ int result = phrase_index->get_range(i, range);
+ if ( result )
+ continue;
PhraseItem item;
- for ( size_t j = min; j < max; j++) {
+ for ( size_t j = range.m_range_begin; j < range.m_range_end; j++) {
int result = phrase_index->get_phrase_item(j, item);
- if ( result == ERROR_NO_SUB_PHRASE_INDEX ||
- result == ERROR_OUT_OF_RANGE)
- break;
+
if ( result == ERROR_NO_ITEM )
continue;
assert( result == ERROR_OK);