src/storage/phrase_index.h

   1 /*
   2  *  libpinyin
   3  *  Library to deal with pinyin.
   4  *
   5  *  Copyright (C) 2006-2007 Peng Wu
   6  *
   7  *  This program is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  This program is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with this program; if not, write to the Free Software
  19  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  20  */
  21
  22 #ifndef PHRASE_INDEX_H
  23 #define PHRASE_INDEX_H
  24
  25 #include <stdio.h>
  26 #include <glib.h>
  27 #include "novel_types.h"
  28 #include "chewing_key.h"
  29 #include "pinyin_parser2.h"
  30 #include "pinyin_phrase2.h"
  31 #include "memory_chunk.h"
  32 #include "phrase_index_logger.h"
  33
  34 /**
  35  * Phrase Index File Format
  36  *
  37  * Indirect Index: Index by Token
  38  * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  39  * + Phrase Offset + Phrase Offset + Phrase Offset + ......  +
  40  * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  41  * Phrase Content:
  42  * ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  43  * + Phrase Length + number of  Pronunciations  + Uni-gram Frequency+
  44  * ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  45  * + Phrase String(UCS4) + n Pronunciations with Frequency +
  46  * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  47  */
  48
  49 namespace pinyin{
  50
  51 /* Store delta info by phrase index logger in user home directory.
  52  */
  53
  54 const size_t phrase_item_header = sizeof(guint8) + sizeof(guint8) + sizeof(guint32);
  55
  56 /**
  57  * PhraseItem:
  58  *
  59  * The PhraseItem to access the items in phrase index.
  60  *
  61  */
  62 class PhraseItem{
  63     friend class SubPhraseIndex;
  64     friend bool _compute_new_header(PhraseIndexLogger * logger,
  65                                     phrase_token_t mask,
  66                                     phrase_token_t value,
  67                                     guint32 & new_total_freq);
  68
  69 private:
  70     MemoryChunk m_chunk;
  71     bool set_n_pronunciation(guint8 n_prouns);
  72 public:
  73     /**
  74      * PhraseItem::PhraseItem:
  75      *
  76      * The constructor of the PhraseItem.
  77      *
  78      */
  79     PhraseItem(){
  80         m_chunk.set_size(phrase_item_header);
  81         memset(m_chunk.begin(), 0, m_chunk.size());
  82     }
  83
  84 #if 0
  85     PhraseItem(MemoryChunk & chunk){
  86         m_chunk.set_content(0, chunk->begin(), chunk->size());
  87         assert ( m_chunk.size() >= phrase_item_header);
  88     }
  89 #endif
  90
  91     /**
  92      * PhraseItem::get_phrase_length:
  93      * @returns: the length of this phrase item.
  94      *
  95      * Get the length of this phrase item.
  96      *
  97      */
  98     guint8 get_phrase_length(){
  99         char * buf_begin = (char *)m_chunk.begin();
 100         return (*(guint8 *)buf_begin);
 101     }
 102
 103     /**
 104      * PhraseItem::get_n_pronunciation:
 105      * @returns: the number of the pronunciations.
 106      *
 107      * Get the number of the pronunciations.
 108      *
 109      */
 110     guint8 get_n_pronunciation(){
 111         char * buf_begin = ( char *) m_chunk.begin();
 112         return (*(guint8 *)(buf_begin + sizeof(guint8)));
 113     }
 114
 115     /**
 116      * PhraseItem::get_unigram_frequency:
 117      * @returns: the uni-gram frequency of this phrase item.
 118      *
 119      * Get the uni-gram frequency of this phrase item.
 120      *
 121      */
 122     guint32 get_unigram_frequency(){
 123         char * buf_begin = (char *)m_chunk.begin();
 124         return (*(guint32 *)(buf_begin + sizeof(guint8) + sizeof(guint8)));
 125     }
 126
 127     /**
 128      * PhraseItem::get_pronunciation_possibility:
 129      * @options: the pinyin options.
 130      * @keys: the pronunciation keys.
 131      * @returns: the possibility of this phrase item pronounces the pinyin.
 132      *
 133      * Get the possibility of this phrase item pronounces the pinyin.
 134      *
 135      */
 136     gfloat get_pronunciation_possibility(pinyin_option_t options,
 137                                          ChewingKey * keys){
 138         guint8 phrase_length = get_phrase_length();
 139         guint8 npron = get_n_pronunciation();
 140         size_t offset = phrase_item_header + phrase_length * sizeof (ucs4_t);
 141         char * buf_begin = (char *)m_chunk.begin();
 142         guint32 matched = 0, total_freq =0;
 143         for ( int i = 0 ; i < npron ; ++i){
 144             char * chewing_begin = buf_begin + offset +
 145                 i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
 146             guint32 * freq = (guint32 *)(chewing_begin +
 147                                          phrase_length * sizeof(ChewingKey));
 148             total_freq += *freq;
 149             if ( 0 == pinyin_compare_with_ambiguities2
 150                  (options,  keys,
 151                   (ChewingKey *)chewing_begin,phrase_length) ){
 152                 matched += *freq;
 153             }
 154         }
 155
 156 #if 1
 157         /* an additional safe guard for chewing. */
 158         if ( 0 == total_freq )
 159             return 0;
 160 #endif
 161
 162         /* used preprocessor to avoid zero freq, in gen_chewing_table. */
 163         gfloat retval = matched / (gfloat) total_freq;
 164         return retval;
 165     }
 166
 167     /**
 168      * PhraseItem::increase_pronunciation_possibility:
 169      * @options: the pinyin options.
 170      * @keys: the pronunciation keys.
 171      * @delta: the delta to be added to the pronunciation keys.
 172      *
 173      * Add the delta to the pronunciation of the pronunciation keys.
 174      *
 175      */
 176     void increase_pronunciation_possibility(pinyin_option_t options,
 177                                      ChewingKey * keys,
 178                                      gint32 delta);
 179
 180     /**
 181      * PhraseItem::get_phrase_string:
 182      * @phrase: the ucs4 character buffer.
 183      * @returns: whether the get operation is successful.
 184      *
 185      * Get the ucs4 characters of this phrase item.
 186      *
 187      */
 188     bool get_phrase_string(ucs4_t * phrase);
 189
 190     /**
 191      * PhraseItem::set_phrase_string:
 192      * @phrase_length: the ucs4 character length of this phrase item.
 193      * @phrase: the ucs4 character buffer.
 194      * @returns: whether the set operation is successful.
 195      *
 196      * Set the length and ucs4 characters of this phrase item.
 197      *
 198      */
 199     bool set_phrase_string(guint8 phrase_length, ucs4_t * phrase);
 200
 201     /**
 202      * PhraseItem::get_nth_pronunciation:
 203      * @index: the pronunciation index.
 204      * @keys: the pronunciation keys.
 205      * @freq: the frequency of the pronunciation.
 206      * @returns: whether the get operation is successful.
 207      *
 208      * Get the nth pronunciation of this phrase item.
 209      *
 210      */
 211     bool get_nth_pronunciation(size_t index,
 212                                /* out */ ChewingKey * keys,
 213                                /* out */ guint32 & freq);
 214
 215     /**
 216      * PhraseItem::append_pronunciation:
 217      * @keys: the pronunciation keys.
 218      * @freq: the frequency of the pronunciation.
 219      *
 220      * Append one pronunciation.
 221      *
 222      */
 223     void append_pronunciation(ChewingKey * keys, guint32 freq);
 224
 225     /**
 226      * PhraseItem::remove_nth_pronunciation:
 227      * @index: the pronunciation index.
 228      *
 229      * Remove the nth pronunciation.
 230      *
 231      * Note: Normally don't change the first pronunciation,
 232      * which decides the token number.
 233      *
 234      */
 235     void remove_nth_pronunciation(size_t index);
 236
 237     bool operator == (const PhraseItem & rhs) const{
 238         if (m_chunk.size() != rhs.m_chunk.size())
 239             return false;
 240         return memcmp(m_chunk.begin(), rhs.m_chunk.begin(),
 241                       m_chunk.size()) == 0;
 242     }
 243
 244     bool operator != (const PhraseItem & rhs) const{
 245         return ! (*this == rhs);
 246     }
 247 };
 248
 249 /*
 250  *  In Sub Phrase Index, token == (token & PHRASE_MASK).
 251  */
 252
 253 /**
 254  * SubPhraseIndex:
 255  *
 256  * The SubPhraseIndex class for internal usage.
 257  *
 258  */
 259 class SubPhraseIndex{
 260 private:
 261     guint32 m_total_freq;
 262     MemoryChunk m_phrase_index;
 263     MemoryChunk m_phrase_content;
 264     MemoryChunk * m_chunk;
 265
 266     void reset(){
 267         m_total_freq = 0;
 268         m_phrase_index.set_size(0);
 269         m_phrase_content.set_size(0);
 270         if ( m_chunk ){
 271             delete m_chunk;
 272             m_chunk = NULL;
 273         }
 274     }
 275
 276 public:
 277     /**
 278      * SubPhraseIndex::SubPhraseIndex:
 279      *
 280      * The constructor of the SubPhraseIndex.
 281      *
 282      */
 283     SubPhraseIndex():m_total_freq(0){
 284         m_chunk = NULL;
 285     }
 286
 287     /**
 288      * SubPhraseIndex::~SubPhraseIndex:
 289      *
 290      * The destructor of the SubPhraseIndex.
 291      *
 292      */
 293     ~SubPhraseIndex(){
 294         reset();
 295     }
 296
 297     /**
 298      * SubPhraseIndex::load:
 299      * @chunk: the memory chunk of the binary sub phrase index.
 300      * @offset: the begin of binary data in the memory chunk.
 301      * @end: the end of binary data in the memory chunk.
 302      * @returns: whether the load operation is successful.
 303      *
 304      * Load the sub phrase index from the memory chunk.
 305      *
 306      */
 307     bool load(MemoryChunk * chunk,
 308               table_offset_t offset, table_offset_t end);
 309
 310     /**
 311      * SubPhraseIndex::store:
 312      * @new_chunk: the new memory chunk to store this sub phrase index.
 313      * @offset: the begin of binary data in the memory chunk.
 314      * @end: the end of stored binary data in the memory chunk.
 315      * @returns: whether the store operation is successful.
 316      *
 317      * Store the sub phrase index to the new memory chunk.
 318      *
 319      */
 320     bool store(MemoryChunk * new_chunk,
 321                table_offset_t offset, table_offset_t & end);
 322
 323     /**
 324      * SubPhraseIndex::diff:
 325      * @oldone: the original content of sub phrase index.
 326      * @logger: the delta information of user self-learning data.
 327      * @returns: whether the diff operation is successful.
 328      *
 329      * Compare this sub phrase index with the original content of the system
 330      * sub phrase index to generate the logger of difference.
 331      *
 332      * Note: Switch to logger format to reduce user space storage.
 333      *
 334      */
 335     bool diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger);
 336
 337     /**
 338      * SubPhraseIndex::merge:
 339      * @logger: the logger of difference in user home directory.
 340      * @returns: whether the merge operation is successful.
 341      *
 342      * Merge the user logger of difference with this sub phrase index.
 343      *
 344      */
 345     bool merge(PhraseIndexLogger * logger);
 346
 347     /**
 348      * SubPhraseIndex::get_range:
 349      * @range: the token range.
 350      * @returns: whether the get operation is successful.
 351      *
 352      * Get the token range in this sub phrase index.
 353      *
 354      */
 355     int get_range(/* out */ PhraseIndexRange & range);
 356
 357     /**
 358      * SubPhraseIndex::get_phrase_index_total_freq:
 359      * @returns: the total frequency of this sub phrase index.
 360      *
 361      * Get the total frequency of this sub phrase index.
 362      *
 363      * Note: maybe call it "Zero-gram".
 364      *
 365      */
 366     guint32 get_phrase_index_total_freq();
 367
 368     /**
 369      * SubPhraseIndex::add_unigram_frequency:
 370      * @token: the phrase token.
 371      * @delta: the delta value of the phrase token.
 372      * @returns: the status of the add operation.
 373      *
 374      * Add delta value to the phrase of the token.
 375      *
 376      * Note: this method is a fast path to add delta value.
 377      * Maybe use the get_phrase_item method instead in future.
 378      *
 379      */
 380     int add_unigram_frequency(phrase_token_t token, guint32 delta);
 381
 382     /**
 383      * SubPhraseIndex::get_phrase_item:
 384      * @token: the phrase token.
 385      * @item: the phrase item of the token.
 386      * @returns: the status of the get operation.
 387      *
 388      * Get the phrase item from this sub phrase index.
 389      *
 390      * Note:get_phrase_item function can't modify the phrase item size,
 391      * but can increment the freq of the special pronunciation,
 392      * or change the content without size increasing.
 393      *
 394      */
 395     int get_phrase_item(phrase_token_t token, PhraseItem & item);
 396
 397     /**
 398      * SubPhraseIndex::add_phrase_item:
 399      * @token: the phrase token.
 400      * @item: the phrase item of the token.
 401      * @returns: the status of the add operation.
 402      *
 403      * Add the phrase item to this sub phrase index.
 404      *
 405      */
 406     int add_phrase_item(phrase_token_t token, PhraseItem * item);
 407
 408     /**
 409      * SubPhraseIndex::remove_phrase_item:
 410      * @token: the phrase token.
 411      * @item: the removed phrase item of the token.
 412      * @returns: the status of the remove operation.
 413      *
 414      * Remove the phrase item of the token.
 415      *
 416      * Note: this remove_phrase_item method will substract the unigram
 417      * frequency of the removed item from m_total_freq.
 418      *
 419      */
 420     int remove_phrase_item(phrase_token_t token, /* out */ PhraseItem * & item);
 421
 422     /**
 423      * SubPhraseIndex::mask_out:
 424      * @mask: the mask.
 425      * @value: the value.
 426      * @returns: whether the mask out operation is successful.
 427      *
 428      * Mask out the matched phrase items.
 429      *
 430      */
 431     bool mask_out(phrase_token_t mask, phrase_token_t value);
 432 };
 433
 434 /**
 435  * FacadePhraseIndex:
 436  *
 437  * The facade class of phrase index.
 438  *
 439  */
 440 class FacadePhraseIndex{
 441 private:
 442     guint32 m_total_freq;
 443     SubPhraseIndex * m_sub_phrase_indices[PHRASE_INDEX_LIBRARY_COUNT];
 444 public:
 445     /**
 446      * FacadePhraseIndex::FacadePhraseIndex:
 447      *
 448      * The constructor of the FacadePhraseIndex.
 449      *
 450      */
 451     FacadePhraseIndex(){
 452         m_total_freq = 0;
 453         memset(m_sub_phrase_indices, 0, sizeof(m_sub_phrase_indices));
 454     }
 455
 456     /**
 457      * FacadePhraseIndex::~FacadePhraseIndex:
 458      *
 459      * The destructor of the FacadePhraseIndex.
 460      *
 461      */
 462     ~FacadePhraseIndex(){
 463         for ( size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i){
 464             if ( m_sub_phrase_indices[i] ){
 465                 delete m_sub_phrase_indices[i];
 466                 m_sub_phrase_indices[i] = NULL;
 467             }
 468         }
 469     }
 470
 471     /**
 472      * FacadePhraseIndex::load_text:
 473      * @phrase_index: the index of sub phrase index to be loaded.
 474      * @infile: the textual format file of the phrase table.
 475      * @returns: whether the load operation is successful.
 476      *
 477      * Load one sub phrase index from the textual format file.
 478      * Note: load sub phrase index according to the config in future.
 479      *
 480      */
 481     bool load_text(guint8 phrase_index, FILE * infile);
 482
 483     /**
 484      * FacadePhraseIndex::load:
 485      * @phrase_index: the index of sub phrase index to be loaded.
 486      * @chunk: the memory chunk of sub phrase index to be loaded.
 487      * @returns: whether the load operation is successful.
 488      *
 489      * Load one sub phrase index from the memory chunk.
 490      *
 491      */
 492     bool load(guint8 phrase_index, MemoryChunk * chunk);
 493
 494     /**
 495      * FacadePhraseIndex::store:
 496      * @phrase_index: the index of sub phrase index to be stored.
 497      * @new_chunk: the memory chunk of sub phrase index to be stored.
 498      * @returns: whether the store operation is successful.
 499      *
 500      * Store one sub phrase index to the memory chunk.
 501      *
 502      */
 503     bool store(guint8 phrase_index, MemoryChunk * new_chunk);
 504
 505     /**
 506      * FacadePhraseIndex::unload:
 507      * @phrase_index: the index of sub phrase index to be unloaded.
 508      * @returns: whether the unload operation is successful.
 509      *
 510      * Unload one sub phrase index.
 511      *
 512      */
 513     bool unload(guint8 phrase_index);
 514
 515
 516     /**
 517      * FacadePhraseIndex::diff:
 518      * @phrase_index: the index of sub phrase index to be differed.
 519      * @oldchunk: the original content of sub phrase index.
 520      * @newlog: the delta information of user self-learning data.
 521      * @returns: whether the diff operation is successful.
 522      *
 523      * Store user delta information in the logger format.
 524      *
 525      * Note: the ownership of oldchunk is transfered here.
 526      *
 527      */
 528     bool diff(guint8 phrase_index, MemoryChunk * oldchunk,
 529               MemoryChunk * newlog);
 530
 531     /**
 532      * FacadePhraseIndex::merge:
 533      * @phrase_index: the index of sub phrase index to be merged.
 534      * @log: the logger of difference in user home directory.
 535      * @returns: whether the merge operation is successful.
 536      *
 537      * Merge the user logger of difference with the sub phrase index.
 538      *
 539      * Note: the ownership of log is transfered here.
 540      *
 541      */
 542     bool merge(guint8 phrase_index, MemoryChunk * log);
 543
 544     /**
 545      * FacadePhraseIndex::merge_with_mask:
 546      * @phrase_index: the index of sub phrase index to be merged.
 547      * @log: the logger of difference in user home directory.
 548      * @mask: the mask.
 549      * @value: the value.
 550      * @returns: whether the merge operation is successful.
 551      *
 552      * Merge the user logger of difference with mask operation.
 553      *
 554      * Note: the ownership of log is transfered here.
 555      *
 556      */
 557     bool merge_with_mask(guint8 phrase_index, MemoryChunk * log,
 558                          phrase_token_t mask, phrase_token_t value);
 559
 560     /**
 561      * FacadePhraseIndex::compact:
 562      * @returns: whether the compact operation is successful.
 563      *
 564      * Compat all sub phrase index memory usage.
 565      *
 566      */
 567     bool compact();
 568
 569     /**
 570      * FacadePhraseIndex::mask_out:
 571      * @phrase_index: the index of sub phrase index.
 572      * @mask: the mask.
 573      * @value: the value.
 574      * @returns: whether the mask out operation is successful.
 575      *
 576      * Mask out the matched phrase items.
 577      *
 578      * Note: should call compact() after the mask out operation.
 579      *
 580      */
 581     bool mask_out(guint8 phrase_index,
 582                   phrase_token_t mask, phrase_token_t value);
 583
 584     /**
 585      * FacadePhraseIndex::get_sub_phrase_range:
 586      * @min_index: the minimal sub phrase index.
 587      * @max_index: the maximal sub phrase index.
 588      * @returns: the status of the get operation.
 589      *
 590      * Get the minimum and maximum of the sub phrase index.
 591      *
 592      */
 593     int get_sub_phrase_range(guint8 & min_index, guint8 & max_index);
 594
 595     /**
 596      * FacadePhraseIndex::get_range:
 597      * @phrase_index: the index of sub phrase index.
 598      * @range: the token range of the sub phrase index.
 599      * @returns: the status of the get operation.
 600      *
 601      * Get the token range of the sub phrase index.
 602      *
 603      */
 604     int get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range);
 605
 606     /**
 607      * FacadePhraseIndex::get_phrase_index_total_freq:
 608      * @returns: the total freq of the facade phrase index.
 609      *
 610      * Get the total freq of the facade phrase index.
 611      *
 612      * Note: maybe call it "Zero-gram".
 613      *
 614      */
 615     guint32 get_phrase_index_total_freq(){
 616         return m_total_freq;
 617     }
 618
 619     /**
 620      * FacadePhraseIndex::add_unigram_frequency:
 621      * @token: the phrase token.
 622      * @delta: the delta value of the phrase token.
 623      * @returns: the status of the add operation.
 624      *
 625      * Add delta value to the phrase of the token.
 626      *
 627      */
 628     int add_unigram_frequency(phrase_token_t token, guint32 delta){
 629         guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
 630         SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index];
 631         if ( !sub_phrase )
 632             return ERROR_NO_SUB_PHRASE_INDEX;
 633         m_total_freq += delta;
 634         return sub_phrase->add_unigram_frequency(token, delta);
 635     }
 636
 637     /**
 638      * FacadePhraseIndex::get_phrase_item:
 639      * @token: the phrase token.
 640      * @item: the phrase item of the token.
 641      * @returns: the status of the get operation.
 642      *
 643      * Get the phrase item from the facade phrase index.
 644      *
 645      */
 646     int get_phrase_item(phrase_token_t token, PhraseItem & item){
 647         guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
 648         SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index];
 649         if ( !sub_phrase )
 650             return ERROR_NO_SUB_PHRASE_INDEX;
 651         return sub_phrase->get_phrase_item(token, item);
 652     }
 653
 654     /**
 655      * FacadePhraseIndex::add_phrase_item:
 656      * @token: the phrase token.
 657      * @item: the phrase item of the token.
 658      * @returns: the status of the add operation.
 659      *
 660      * Add the phrase item to the facade phrase index.
 661      *
 662      */
 663     int add_phrase_item(phrase_token_t token, PhraseItem * item){
 664         guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
 665         SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index];
 666         if ( !sub_phrase ){
 667             sub_phrase = new SubPhraseIndex;
 668         }
 669         m_total_freq += item->get_unigram_frequency();
 670         return sub_phrase->add_phrase_item(token, item);
 671     }
 672
 673     /**
 674      * FacadePhraseIndex::remove_phrase_item:
 675      * @token: the phrase token.
 676      * @item: the removed phrase item of the token.
 677      * @returns: the status of the remove operation.
 678      *
 679      * Remove the phrase item of the token.
 680      *
 681      */
 682     int remove_phrase_item(phrase_token_t token, PhraseItem * & item){
 683         guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
 684         SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index];
 685         if ( !sub_phrase ){
 686             return ERROR_NO_SUB_PHRASE_INDEX;
 687         }
 688         int result = sub_phrase->remove_phrase_item(token, item);
 689         if ( result )
 690             return result;
 691         m_total_freq -= item->get_unigram_frequency();
 692         return result;
 693     }
 694
 695     /**
 696      * FacadePhraseIndex::prepare_ranges:
 697      * @ranges: the ranges to be prepared.
 698      * @returns: whether the prepare operation is successful.
 699      *
 700      * Prepare the ranges.
 701      *
 702      */
 703     bool prepare_ranges(PhraseIndexRanges ranges) {
 704         /* assume memset(ranges, 0, sizeof(ranges)); */
 705         for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
 706             GArray * & range = ranges[i];
 707             assert(NULL == range);
 708
 709             SubPhraseIndex * sub_phrase = m_sub_phrase_indices[i];
 710             if (sub_phrase) {
 711                 range = g_array_new(FALSE, FALSE, sizeof(PhraseIndexRange));
 712             }
 713         }
 714         return true;
 715     }
 716
 717     /**
 718      * FacadePhraseIndex::clear_ranges:
 719      * @ranges: the ranges to be cleared.
 720      * @returns: whether the clear operation is successful.
 721      *
 722      * Clear the ranges.
 723      *
 724      */
 725     bool clear_ranges(PhraseIndexRanges ranges) {
 726         for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
 727             GArray * range = ranges[i];
 728             if (range) {
 729                 g_array_set_size(range, 0);
 730             }
 731         }
 732         return true;
 733     }
 734
 735     /**
 736      * FacadePhraseIndex::destroy_ranges:
 737      * @ranges: the ranges to be destroyed.
 738      * @returns: whether the destroy operation is successful.
 739      *
 740      * Destroy the ranges.
 741      *
 742      */
 743     bool destroy_ranges(PhraseIndexRanges ranges) {
 744         for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
 745             GArray * & range = ranges[i];
 746             if (range) {
 747                 g_array_free(range, TRUE);
 748                 range = NULL;
 749             }
 750         }
 751         return true;
 752     }
 753
 754     /**
 755      * FacadePhraseIndex::prepare_tokens:
 756      * @tokens: the tokens to be prepared.
 757      * @returns: whether the prepare operation is successful.
 758      *
 759      * Prepare the tokens.
 760      *
 761      */
 762     bool prepare_tokens(PhraseTokens tokens) {
 763         /* assume memset(tokens, 0, sizeof(tokens)); */
 764         for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
 765             GArray * & token = tokens[i];
 766             assert(NULL == token);
 767
 768             SubPhraseIndex * sub_phrase = m_sub_phrase_indices[i];
 769             if (sub_phrase) {
 770                 token = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
 771             }
 772         }
 773         return true;
 774     }
 775
 776     /**
 777      * FacadePhraseIndex::clear_tokens:
 778      * @tokens: the tokens to be cleared.
 779      * @return: whether the clear operation is successful.
 780      *
 781      * Clear the tokens.
 782      *
 783      */
 784     bool clear_tokens(PhraseTokens tokens) {
 785         for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
 786             GArray * token = tokens[i];
 787             if (token) {
 788                 g_array_set_size(token, 0);
 789             }
 790         }
 791         return true;
 792     }
 793
 794     /**
 795      * FacadePhraseIndex::destroy_tokens:
 796      * @tokens: the tokens to be destroyed.
 797      * @returns: whether the destroy operation is successful.
 798      *
 799      * Destroy the tokens.
 800      *
 801      */
 802     bool destroy_tokens(PhraseTokens tokens) {
 803         for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
 804             GArray * & token = tokens[i];
 805             if (token) {
 806                 g_array_free(token, TRUE);
 807                 token = NULL;
 808             }
 809         }
 810         return true;
 811     }
 812
 813     /**
 814      * FacadePhraseIndex::create_sub_phrase:
 815      * @index: the phrase index to be created.
 816      * @returns: the result of the create operation.
 817      *
 818      * Create the sub phrase index.
 819      *
 820      */
 821     int create_sub_phrase(guint8 index) {
 822         SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index];
 823         if (sub_phrase) {
 824             return ERROR_ALREADY_EXISTS;
 825         }
 826
 827         sub_phrase = new SubPhraseIndex;
 828
 829         return ERROR_OK;
 830     }
 831 };
 832
 833 typedef enum {
 834     NOT_USED,                /* not used. */
 835     SYSTEM_FILE,             /* system phrase file. */
 836     DICTIONARY,              /* professional dictionary. */
 837     USER_FILE,               /* user only phrase file. */
 838 } PHRASE_FILE_TYPE;
 839
 840 typedef struct {
 841     const PHRASE_INDEX_LIBRARIES m_dict_index; /* for assert purpose. */
 842     const char * m_table_filename;
 843     const char * m_system_filename;
 844     const char * m_user_filename;
 845     PHRASE_FILE_TYPE m_file_type;
 846 } pinyin_table_info_t;
 847
 848 extern const pinyin_table_info_t pinyin_phrase_files[PHRASE_INDEX_LIBRARY_COUNT];
 849
 850 PhraseIndexLogger * mask_out_phrase_index_logger
 851 (PhraseIndexLogger * oldlogger, phrase_token_t mask, phrase_token_t value);
 852
 853 };
 854
 855 #endif