src/storage/phrase_index.h

   1 /*
   2  *  libpinyin
   3  *  Library to deal with pinyin.
   4  *
   5  *  Copyright (C) 2006-2007 Peng Wu
   6  *
   7  *  This program is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  This program is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with this program; if not, write to the Free Software
  19  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  20  */
  21
  22 #ifndef PHRASE_INDEX_H
  23 #define PHRASE_INDEX_H
  24
  25 #include <stdio.h>
  26 #include <glib.h>
  27 #include "novel_types.h"
  28 #include "chewing_key.h"
  29 #include "pinyin_parser2.h"
  30 #include "pinyin_phrase2.h"
  31 #include "memory_chunk.h"
  32 #include "phrase_index_logger.h"
  33
  34 /**
  35  * Phrase Index File Format
  36  *
  37  * Indirect Index: Index by Token
  38  * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  39  * + Phrase Offset + Phrase Offset + Phrase Offset + ......  +
  40  * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  41  * Phrase Content:
  42  * ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  43  * + Phrase Length + number of  Pronunciations  + Uni-gram Frequency+
  44  * ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  45  * + Phrase String(UCS4) + n Pronunciations with Frequency +
  46  * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  47  */
  48
  49 namespace pinyin{
  50
  51 /* Store delta info by phrase index logger in user home directory.
  52  */
  53
  54 const size_t phrase_item_header = sizeof(guint8) + sizeof(guint8) + sizeof(guint32);
  55
  56 /**
  57  * PhraseItem:
  58  *
  59  * The PhraseItem to access the items in phrase index.
  60  *
  61  */
  62 class PhraseItem{
  63     friend class SubPhraseIndex;
  64     friend bool _compute_new_header(PhraseIndexLogger * logger,
  65                                     phrase_token_t mask,
  66                                     phrase_token_t value,
  67                                     guint32 & new_total_freq);
  68
  69 private:
  70     MemoryChunk m_chunk;
  71     bool set_n_pronunciation(guint8 n_prouns);
  72 public:
  73     /**
  74      * PhraseItem::PhraseItem:
  75      *
  76      * The constructor of the PhraseItem.
  77      *
  78      */
  79     PhraseItem(){
  80         m_chunk.set_size(phrase_item_header);
  81         memset(m_chunk.begin(), 0, m_chunk.size());
  82     }
  83
  84 #if 0
  85     PhraseItem(MemoryChunk & chunk){
  86         m_chunk.set_content(0, chunk->begin(), chunk->size());
  87         assert ( m_chunk.size() >= phrase_item_header);
  88     }
  89 #endif
  90
  91     /**
  92      * PhraseItem::get_phrase_length:
  93      * @returns: the length of this phrase item.
  94      *
  95      * Get the length of this phrase item.
  96      *
  97      */
  98     guint8 get_phrase_length(){
  99         char * buf_begin = (char *)m_chunk.begin();
 100         return (*(guint8 *)buf_begin);
 101     }
 102
 103     /**
 104      * PhraseItem::get_n_pronunciation:
 105      * @returns: the number of the pronunciations.
 106      *
 107      * Get the number of the pronunciations.
 108      *
 109      */
 110     guint8 get_n_pronunciation(){
 111         char * buf_begin = ( char *) m_chunk.begin();
 112         return (*(guint8 *)(buf_begin + sizeof(guint8)));
 113     }
 114
 115     /**
 116      * PhraseItem::get_unigram_frequency:
 117      * @returns: the uni-gram frequency of this phrase item.
 118      *
 119      * Get the uni-gram frequency of this phrase item.
 120      *
 121      */
 122     guint32 get_unigram_frequency(){
 123         char * buf_begin = (char *)m_chunk.begin();
 124         return (*(guint32 *)(buf_begin + sizeof(guint8) + sizeof(guint8)));
 125     }
 126
 127     /**
 128      * PhraseItem::get_pronunciation_possibility:
 129      * @options: the pinyin options.
 130      * @keys: the pronunciation keys.
 131      * @returns: the possibility of this phrase item pronounces the pinyin.
 132      *
 133      * Get the possibility of this phrase item pronounces the pinyin.
 134      *
 135      */
 136     gfloat get_pronunciation_possibility(pinyin_option_t options,
 137                                          ChewingKey * keys){
 138         guint8 phrase_length = get_phrase_length();
 139         guint8 npron = get_n_pronunciation();
 140         size_t offset = phrase_item_header + phrase_length * sizeof (ucs4_t);
 141         char * buf_begin = (char *)m_chunk.begin();
 142         guint32 matched = 0, total_freq =0;
 143         for ( int i = 0 ; i < npron ; ++i){
 144             char * chewing_begin = buf_begin + offset +
 145                 i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
 146             guint32 * freq = (guint32 *)(chewing_begin +
 147                                          phrase_length * sizeof(ChewingKey));
 148             total_freq += *freq;
 149             if ( 0 == pinyin_compare_with_ambiguities2
 150                  (options,  keys,
 151                   (ChewingKey *)chewing_begin,phrase_length) ){
 152                 matched += *freq;
 153             }
 154         }
 155
 156 #if 1
 157         /* an additional safe guard for chewing. */
 158         if ( 0 == total_freq )
 159             return 0;
 160 #endif
 161
 162         /* used preprocessor to avoid zero freq, in gen_chewing_table. */
 163         gfloat retval = matched / (gfloat) total_freq;
 164         return retval;
 165     }
 166
 167     /**
 168      * PhraseItem::increase_pronunciation_possibility:
 169      * @options: the pinyin options.
 170      * @keys: the pronunciation keys.
 171      * @delta: the delta to be added to the pronunciation keys.
 172      *
 173      * Add the delta to the pronunciation of the pronunciation keys.
 174      *
 175      */
 176     void increase_pronunciation_possibility(pinyin_option_t options,
 177                                      ChewingKey * keys,
 178                                      gint32 delta);
 179
 180     /**
 181      * PhraseItem::get_phrase_string:
 182      * @phrase: the ucs4 character buffer.
 183      * @returns: whether the get operation is successful.
 184      *
 185      * Get the ucs4 characters of this phrase item.
 186      *
 187      */
 188     bool get_phrase_string(ucs4_t * phrase);
 189
 190     /**
 191      * PhraseItem::set_phrase_string:
 192      * @phrase_length: the ucs4 character length of this phrase item.
 193      * @phrase: the ucs4 character buffer.
 194      * @returns: whether the set operation is successful.
 195      *
 196      * Set the length and ucs4 characters of this phrase item.
 197      *
 198      */
 199     bool set_phrase_string(guint8 phrase_length, ucs4_t * phrase);
 200
 201     /**
 202      * PhraseItem::get_nth_pronunciation:
 203      * @index: the pronunciation index.
 204      * @keys: the pronunciation keys.
 205      * @freq: the frequency of the pronunciation.
 206      * @returns: whether the get operation is successful.
 207      *
 208      * Get the nth pronunciation of this phrase item.
 209      *
 210      */
 211     bool get_nth_pronunciation(size_t index,
 212                                /* out */ ChewingKey * keys,
 213                                /* out */ guint32 & freq);
 214
 215     /**
 216      * PhraseItem::add_pronunciation:
 217      * @keys: the pronunciation keys.
 218      * @delta: the delta of the frequency of the pronunciation.
 219      * @returns: whether the add operation is successful.
 220      *
 221      * Add one pronunciation.
 222      *
 223      */
 224     bool add_pronunciation(ChewingKey * keys, guint32 delta);
 225
 226     /**
 227      * PhraseItem::remove_nth_pronunciation:
 228      * @index: the pronunciation index.
 229      *
 230      * Remove the nth pronunciation.
 231      *
 232      * Note: Normally don't change the first pronunciation,
 233      * which decides the token number.
 234      *
 235      */
 236     void remove_nth_pronunciation(size_t index);
 237
 238     bool operator == (const PhraseItem & rhs) const{
 239         if (m_chunk.size() != rhs.m_chunk.size())
 240             return false;
 241         return memcmp(m_chunk.begin(), rhs.m_chunk.begin(),
 242                       m_chunk.size()) == 0;
 243     }
 244
 245     bool operator != (const PhraseItem & rhs) const{
 246         return ! (*this == rhs);
 247     }
 248 };
 249
 250 /*
 251  *  In Sub Phrase Index, token == (token & PHRASE_MASK).
 252  */
 253
 254 /**
 255  * SubPhraseIndex:
 256  *
 257  * The SubPhraseIndex class for internal usage.
 258  *
 259  */
 260 class SubPhraseIndex{
 261 private:
 262     guint32 m_total_freq;
 263     MemoryChunk m_phrase_index;
 264     MemoryChunk m_phrase_content;
 265     MemoryChunk * m_chunk;
 266
 267     void reset(){
 268         m_total_freq = 0;
 269         m_phrase_index.set_size(0);
 270         m_phrase_content.set_size(0);
 271         if ( m_chunk ){
 272             delete m_chunk;
 273             m_chunk = NULL;
 274         }
 275     }
 276
 277 public:
 278     /**
 279      * SubPhraseIndex::SubPhraseIndex:
 280      *
 281      * The constructor of the SubPhraseIndex.
 282      *
 283      */
 284     SubPhraseIndex():m_total_freq(0){
 285         m_chunk = NULL;
 286     }
 287
 288     /**
 289      * SubPhraseIndex::~SubPhraseIndex:
 290      *
 291      * The destructor of the SubPhraseIndex.
 292      *
 293      */
 294     ~SubPhraseIndex(){
 295         reset();
 296     }
 297
 298     /**
 299      * SubPhraseIndex::load:
 300      * @chunk: the memory chunk of the binary sub phrase index.
 301      * @offset: the begin of binary data in the memory chunk.
 302      * @end: the end of binary data in the memory chunk.
 303      * @returns: whether the load operation is successful.
 304      *
 305      * Load the sub phrase index from the memory chunk.
 306      *
 307      */
 308     bool load(MemoryChunk * chunk,
 309               table_offset_t offset, table_offset_t end);
 310
 311     /**
 312      * SubPhraseIndex::store:
 313      * @new_chunk: the new memory chunk to store this sub phrase index.
 314      * @offset: the begin of binary data in the memory chunk.
 315      * @end: the end of stored binary data in the memory chunk.
 316      * @returns: whether the store operation is successful.
 317      *
 318      * Store the sub phrase index to the new memory chunk.
 319      *
 320      */
 321     bool store(MemoryChunk * new_chunk,
 322                table_offset_t offset, table_offset_t & end);
 323
 324     /**
 325      * SubPhraseIndex::diff:
 326      * @oldone: the original content of sub phrase index.
 327      * @logger: the delta information of user self-learning data.
 328      * @returns: whether the diff operation is successful.
 329      *
 330      * Compare this sub phrase index with the original content of the system
 331      * sub phrase index to generate the logger of difference.
 332      *
 333      * Note: Switch to logger format to reduce user space storage.
 334      *
 335      */
 336     bool diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger);
 337
 338     /**
 339      * SubPhraseIndex::merge:
 340      * @logger: the logger of difference in user home directory.
 341      * @returns: whether the merge operation is successful.
 342      *
 343      * Merge the user logger of difference with this sub phrase index.
 344      *
 345      */
 346     bool merge(PhraseIndexLogger * logger);
 347
 348     /**
 349      * SubPhraseIndex::get_range:
 350      * @range: the token range.
 351      * @returns: whether the get operation is successful.
 352      *
 353      * Get the token range in this sub phrase index.
 354      *
 355      */
 356     int get_range(/* out */ PhraseIndexRange & range);
 357
 358     /**
 359      * SubPhraseIndex::get_phrase_index_total_freq:
 360      * @returns: the total frequency of this sub phrase index.
 361      *
 362      * Get the total frequency of this sub phrase index.
 363      *
 364      * Note: maybe call it "Zero-gram".
 365      *
 366      */
 367     guint32 get_phrase_index_total_freq();
 368
 369     /**
 370      * SubPhraseIndex::add_unigram_frequency:
 371      * @token: the phrase token.
 372      * @delta: the delta value of the phrase token.
 373      * @returns: the status of the add operation.
 374      *
 375      * Add delta value to the phrase of the token.
 376      *
 377      * Note: this method is a fast path to add delta value.
 378      * Maybe use the get_phrase_item method instead in future.
 379      *
 380      */
 381     int add_unigram_frequency(phrase_token_t token, guint32 delta);
 382
 383     /**
 384      * SubPhraseIndex::get_phrase_item:
 385      * @token: the phrase token.
 386      * @item: the phrase item of the token.
 387      * @returns: the status of the get operation.
 388      *
 389      * Get the phrase item from this sub phrase index.
 390      *
 391      * Note:get_phrase_item function can't modify the phrase item size,
 392      * but can increment the freq of the special pronunciation,
 393      * or change the content without size increasing.
 394      *
 395      */
 396     int get_phrase_item(phrase_token_t token, PhraseItem & item);
 397
 398     /**
 399      * SubPhraseIndex::add_phrase_item:
 400      * @token: the phrase token.
 401      * @item: the phrase item of the token.
 402      * @returns: the status of the add operation.
 403      *
 404      * Add the phrase item to this sub phrase index.
 405      *
 406      */
 407     int add_phrase_item(phrase_token_t token, PhraseItem * item);
 408
 409     /**
 410      * SubPhraseIndex::remove_phrase_item:
 411      * @token: the phrase token.
 412      * @item: the removed phrase item of the token.
 413      * @returns: the status of the remove operation.
 414      *
 415      * Remove the phrase item of the token.
 416      *
 417      * Note: this remove_phrase_item method will substract the unigram
 418      * frequency of the removed item from m_total_freq.
 419      *
 420      */
 421     int remove_phrase_item(phrase_token_t token, /* out */ PhraseItem * & item);
 422
 423     /**
 424      * SubPhraseIndex::mask_out:
 425      * @mask: the mask.
 426      * @value: the value.
 427      * @returns: whether the mask out operation is successful.
 428      *
 429      * Mask out the matched phrase items.
 430      *
 431      */
 432     bool mask_out(phrase_token_t mask, phrase_token_t value);
 433 };
 434
 435 /**
 436  * FacadePhraseIndex:
 437  *
 438  * The facade class of phrase index.
 439  *
 440  */
 441 class FacadePhraseIndex{
 442 private:
 443     guint32 m_total_freq;
 444     SubPhraseIndex * m_sub_phrase_indices[PHRASE_INDEX_LIBRARY_COUNT];
 445 public:
 446     /**
 447      * FacadePhraseIndex::FacadePhraseIndex:
 448      *
 449      * The constructor of the FacadePhraseIndex.
 450      *
 451      */
 452     FacadePhraseIndex(){
 453         m_total_freq = 0;
 454         memset(m_sub_phrase_indices, 0, sizeof(m_sub_phrase_indices));
 455     }
 456
 457     /**
 458      * FacadePhraseIndex::~FacadePhraseIndex:
 459      *
 460      * The destructor of the FacadePhraseIndex.
 461      *
 462      */
 463     ~FacadePhraseIndex(){
 464         for ( size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i){
 465             if ( m_sub_phrase_indices[i] ){
 466                 delete m_sub_phrase_indices[i];
 467                 m_sub_phrase_indices[i] = NULL;
 468             }
 469         }
 470     }
 471
 472     /**
 473      * FacadePhraseIndex::load_text:
 474      * @phrase_index: the index of sub phrase index to be loaded.
 475      * @infile: the textual format file of the phrase table.
 476      * @returns: whether the load operation is successful.
 477      *
 478      * Load one sub phrase index from the textual format file.
 479      * Note: load sub phrase index according to the config in future.
 480      *
 481      */
 482     bool load_text(guint8 phrase_index, FILE * infile);
 483
 484     /**
 485      * FacadePhraseIndex::load:
 486      * @phrase_index: the index of sub phrase index to be loaded.
 487      * @chunk: the memory chunk of sub phrase index to be loaded.
 488      * @returns: whether the load operation is successful.
 489      *
 490      * Load one sub phrase index from the memory chunk.
 491      *
 492      */
 493     bool load(guint8 phrase_index, MemoryChunk * chunk);
 494
 495     /**
 496      * FacadePhraseIndex::store:
 497      * @phrase_index: the index of sub phrase index to be stored.
 498      * @new_chunk: the memory chunk of sub phrase index to be stored.
 499      * @returns: whether the store operation is successful.
 500      *
 501      * Store one sub phrase index to the memory chunk.
 502      *
 503      */
 504     bool store(guint8 phrase_index, MemoryChunk * new_chunk);
 505
 506     /**
 507      * FacadePhraseIndex::unload:
 508      * @phrase_index: the index of sub phrase index to be unloaded.
 509      * @returns: whether the unload operation is successful.
 510      *
 511      * Unload one sub phrase index.
 512      *
 513      */
 514     bool unload(guint8 phrase_index);
 515
 516
 517     /**
 518      * FacadePhraseIndex::diff:
 519      * @phrase_index: the index of sub phrase index to be differed.
 520      * @oldchunk: the original content of sub phrase index.
 521      * @newlog: the delta information of user self-learning data.
 522      * @returns: whether the diff operation is successful.
 523      *
 524      * Store user delta information in the logger format.
 525      *
 526      * Note: the ownership of oldchunk is transfered here.
 527      *
 528      */
 529     bool diff(guint8 phrase_index, MemoryChunk * oldchunk,
 530               MemoryChunk * newlog);
 531
 532     /**
 533      * FacadePhraseIndex::merge:
 534      * @phrase_index: the index of sub phrase index to be merged.
 535      * @log: the logger of difference in user home directory.
 536      * @returns: whether the merge operation is successful.
 537      *
 538      * Merge the user logger of difference with the sub phrase index.
 539      *
 540      * Note: the ownership of log is transfered here.
 541      *
 542      */
 543     bool merge(guint8 phrase_index, MemoryChunk * log);
 544
 545     /**
 546      * FacadePhraseIndex::merge_with_mask:
 547      * @phrase_index: the index of sub phrase index to be merged.
 548      * @log: the logger of difference in user home directory.
 549      * @mask: the mask.
 550      * @value: the value.
 551      * @returns: whether the merge operation is successful.
 552      *
 553      * Merge the user logger of difference with mask operation.
 554      *
 555      * Note: the ownership of log is transfered here.
 556      *
 557      */
 558     bool merge_with_mask(guint8 phrase_index, MemoryChunk * log,
 559                          phrase_token_t mask, phrase_token_t value);
 560
 561     /**
 562      * FacadePhraseIndex::compact:
 563      * @returns: whether the compact operation is successful.
 564      *
 565      * Compat all sub phrase index memory usage.
 566      *
 567      */
 568     bool compact();
 569
 570     /**
 571      * FacadePhraseIndex::mask_out:
 572      * @phrase_index: the index of sub phrase index.
 573      * @mask: the mask.
 574      * @value: the value.
 575      * @returns: whether the mask out operation is successful.
 576      *
 577      * Mask out the matched phrase items.
 578      *
 579      * Note: should call compact() after the mask out operation.
 580      *
 581      */
 582     bool mask_out(guint8 phrase_index,
 583                   phrase_token_t mask, phrase_token_t value);
 584
 585     /**
 586      * FacadePhraseIndex::get_sub_phrase_range:
 587      * @min_index: the minimal sub phrase index.
 588      * @max_index: the maximal sub phrase index.
 589      * @returns: the status of the get operation.
 590      *
 591      * Get the minimum and maximum of the sub phrase index.
 592      *
 593      */
 594     int get_sub_phrase_range(guint8 & min_index, guint8 & max_index);
 595
 596     /**
 597      * FacadePhraseIndex::get_range:
 598      * @phrase_index: the index of sub phrase index.
 599      * @range: the token range of the sub phrase index.
 600      * @returns: the status of the get operation.
 601      *
 602      * Get the token range of the sub phrase index.
 603      *
 604      */
 605     int get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range);
 606
 607     /**
 608      * FacadePhraseIndex::get_phrase_index_total_freq:
 609      * @returns: the total freq of the facade phrase index.
 610      *
 611      * Get the total freq of the facade phrase index.
 612      *
 613      * Note: maybe call it "Zero-gram".
 614      *
 615      */
 616     guint32 get_phrase_index_total_freq(){
 617         return m_total_freq;
 618     }
 619
 620     /**
 621      * FacadePhraseIndex::add_unigram_frequency:
 622      * @token: the phrase token.
 623      * @delta: the delta value of the phrase token.
 624      * @returns: the status of the add operation.
 625      *
 626      * Add delta value to the phrase of the token.
 627      *
 628      */
 629     int add_unigram_frequency(phrase_token_t token, guint32 delta){
 630         guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
 631         SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index];
 632         if ( !sub_phrase )
 633             return ERROR_NO_SUB_PHRASE_INDEX;
 634         m_total_freq += delta;
 635         return sub_phrase->add_unigram_frequency(token, delta);
 636     }
 637
 638     /**
 639      * FacadePhraseIndex::get_phrase_item:
 640      * @token: the phrase token.
 641      * @item: the phrase item of the token.
 642      * @returns: the status of the get operation.
 643      *
 644      * Get the phrase item from the facade phrase index.
 645      *
 646      */
 647     int get_phrase_item(phrase_token_t token, PhraseItem & item){
 648         guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
 649         SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index];
 650         if ( !sub_phrase )
 651             return ERROR_NO_SUB_PHRASE_INDEX;
 652         return sub_phrase->get_phrase_item(token, item);
 653     }
 654
 655     /**
 656      * FacadePhraseIndex::add_phrase_item:
 657      * @token: the phrase token.
 658      * @item: the phrase item of the token.
 659      * @returns: the status of the add operation.
 660      *
 661      * Add the phrase item to the facade phrase index.
 662      *
 663      */
 664     int add_phrase_item(phrase_token_t token, PhraseItem * item){
 665         guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
 666         SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index];
 667         if ( !sub_phrase ){
 668             sub_phrase = new SubPhraseIndex;
 669         }
 670         m_total_freq += item->get_unigram_frequency();
 671         return sub_phrase->add_phrase_item(token, item);
 672     }
 673
 674     /**
 675      * FacadePhraseIndex::remove_phrase_item:
 676      * @token: the phrase token.
 677      * @item: the removed phrase item of the token.
 678      * @returns: the status of the remove operation.
 679      *
 680      * Remove the phrase item of the token.
 681      *
 682      */
 683     int remove_phrase_item(phrase_token_t token, PhraseItem * & item){
 684         guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
 685         SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index];
 686         if ( !sub_phrase ){
 687             return ERROR_NO_SUB_PHRASE_INDEX;
 688         }
 689         int result = sub_phrase->remove_phrase_item(token, item);
 690         if ( result )
 691             return result;
 692         m_total_freq -= item->get_unigram_frequency();
 693         return result;
 694     }
 695
 696     /**
 697      * FacadePhraseIndex::prepare_ranges:
 698      * @ranges: the ranges to be prepared.
 699      * @returns: whether the prepare operation is successful.
 700      *
 701      * Prepare the ranges.
 702      *
 703      */
 704     bool prepare_ranges(PhraseIndexRanges ranges) {
 705         /* assume memset(ranges, 0, sizeof(ranges)); */
 706         for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
 707             GArray * & range = ranges[i];
 708             assert(NULL == range);
 709
 710             SubPhraseIndex * sub_phrase = m_sub_phrase_indices[i];
 711             if (sub_phrase) {
 712                 range = g_array_new(FALSE, FALSE, sizeof(PhraseIndexRange));
 713             }
 714         }
 715         return true;
 716     }
 717
 718     /**
 719      * FacadePhraseIndex::clear_ranges:
 720      * @ranges: the ranges to be cleared.
 721      * @returns: whether the clear operation is successful.
 722      *
 723      * Clear the ranges.
 724      *
 725      */
 726     bool clear_ranges(PhraseIndexRanges ranges) {
 727         for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
 728             GArray * range = ranges[i];
 729             if (range) {
 730                 g_array_set_size(range, 0);
 731             }
 732         }
 733         return true;
 734     }
 735
 736     /**
 737      * FacadePhraseIndex::destroy_ranges:
 738      * @ranges: the ranges to be destroyed.
 739      * @returns: whether the destroy operation is successful.
 740      *
 741      * Destroy the ranges.
 742      *
 743      */
 744     bool destroy_ranges(PhraseIndexRanges ranges) {
 745         for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
 746             GArray * & range = ranges[i];
 747             if (range) {
 748                 g_array_free(range, TRUE);
 749                 range = NULL;
 750             }
 751         }
 752         return true;
 753     }
 754
 755     /**
 756      * FacadePhraseIndex::prepare_tokens:
 757      * @tokens: the tokens to be prepared.
 758      * @returns: whether the prepare operation is successful.
 759      *
 760      * Prepare the tokens.
 761      *
 762      */
 763     bool prepare_tokens(PhraseTokens tokens) {
 764         /* assume memset(tokens, 0, sizeof(tokens)); */
 765         for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
 766             GArray * & token = tokens[i];
 767             assert(NULL == token);
 768
 769             SubPhraseIndex * sub_phrase = m_sub_phrase_indices[i];
 770             if (sub_phrase) {
 771                 token = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
 772             }
 773         }
 774         return true;
 775     }
 776
 777     /**
 778      * FacadePhraseIndex::clear_tokens:
 779      * @tokens: the tokens to be cleared.
 780      * @return: whether the clear operation is successful.
 781      *
 782      * Clear the tokens.
 783      *
 784      */
 785     bool clear_tokens(PhraseTokens tokens) {
 786         for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
 787             GArray * token = tokens[i];
 788             if (token) {
 789                 g_array_set_size(token, 0);
 790             }
 791         }
 792         return true;
 793     }
 794
 795     /**
 796      * FacadePhraseIndex::destroy_tokens:
 797      * @tokens: the tokens to be destroyed.
 798      * @returns: whether the destroy operation is successful.
 799      *
 800      * Destroy the tokens.
 801      *
 802      */
 803     bool destroy_tokens(PhraseTokens tokens) {
 804         for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
 805             GArray * & token = tokens[i];
 806             if (token) {
 807                 g_array_free(token, TRUE);
 808                 token = NULL;
 809             }
 810         }
 811         return true;
 812     }
 813
 814     /**
 815      * FacadePhraseIndex::create_sub_phrase:
 816      * @index: the phrase index to be created.
 817      * @returns: the result of the create operation.
 818      *
 819      * Create the sub phrase index.
 820      *
 821      */
 822     int create_sub_phrase(guint8 index) {
 823         SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index];
 824         if (sub_phrase) {
 825             return ERROR_ALREADY_EXISTS;
 826         }
 827
 828         sub_phrase = new SubPhraseIndex;
 829
 830         return ERROR_OK;
 831     }
 832 };
 833
 834 typedef enum {
 835     NOT_USED,                /* not used. */
 836     SYSTEM_FILE,             /* system phrase file. */
 837     DICTIONARY,              /* professional dictionary. */
 838     USER_FILE,               /* user only phrase file. */
 839 } PHRASE_FILE_TYPE;
 840
 841 typedef struct {
 842     const PHRASE_INDEX_LIBRARIES m_dict_index; /* for assert purpose. */
 843     const char * m_table_filename;
 844     const char * m_system_filename;
 845     const char * m_user_filename;
 846     PHRASE_FILE_TYPE m_file_type;
 847 } pinyin_table_info_t;
 848
 849 extern const pinyin_table_info_t pinyin_phrase_files[PHRASE_INDEX_LIBRARY_COUNT];
 850
 851 PhraseIndexLogger * mask_out_phrase_index_logger
 852 (PhraseIndexLogger * oldlogger, phrase_token_t mask, phrase_token_t value);
 853
 854 };
 855
 856 #endif