src/storage/phrase_index.h

   1 /*
   2  *  libpinyin
   3  *  Library to deal with pinyin.
   4  *
   5  *  Copyright (C) 2006-2007 Peng Wu
   6  *
   7  *  This program is free software; you can redistribute it and/or modify
   8  *  it under the terms of the GNU General Public License as published by
   9  *  the Free Software Foundation; either version 2 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  This program is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15  *  GNU General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU General Public License
  18  *  along with this program; if not, write to the Free Software
  19  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  20  */
  21
  22 #ifndef PHRASE_INDEX_H
  23 #define PHRASE_INDEX_H
  24
  25 #include <stdio.h>
  26 #include <glib.h>
  27 #include "novel_types.h"
  28 #include "chewing_key.h"
  29 #include "pinyin_parser2.h"
  30 #include "pinyin_phrase2.h"
  31 #include "memory_chunk.h"
  32 #include "phrase_index_logger.h"
  33
  34 /**
  35  * Phrase Index File Format
  36  *
  37  * Indirect Index: Index by Token
  38  * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  39  * + Phrase Offset + Phrase Offset + Phrase Offset + ......  +
  40  * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  41  * Phrase Content:
  42  * ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  43  * + Phrase Length + number of  Pronunciations  + Uni-gram Frequency+
  44  * ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  45  * + Phrase String(UCS2) + n Pronunciations with Frequency +
  46  * +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  47  */
  48
  49 namespace pinyin{
  50
  51 /* Store delta info by phrase index logger in user home directory.
  52  */
  53
  54 const size_t phrase_item_header = sizeof(guint8) + sizeof(guint8) + sizeof(guint32);
  55
  56 /**
  57  * PhraseItem:
  58  *
  59  * The PhraseItem to access the items in phrase index.
  60  *
  61  */
  62 class PhraseItem{
  63     friend class SubPhraseIndex;
  64 private:
  65     MemoryChunk m_chunk;
  66     bool set_n_pronunciation(guint8 n_prouns);
  67 public:
  68     /**
  69      * PhraseItem::PhraseItem:
  70      *
  71      * The constructor of the PhraseItem.
  72      *
  73      */
  74     PhraseItem(){
  75         m_chunk.set_size(phrase_item_header);
  76         memset(m_chunk.begin(), 0, m_chunk.size());
  77     }
  78
  79 #if 0
  80     PhraseItem(MemoryChunk & chunk){
  81         m_chunk.set_content(0, chunk->begin(), chunk->size());
  82         assert ( m_chunk.size() >= phrase_item_header);
  83     }
  84 #endif
  85
  86     /**
  87      * PhraseItem::get_phrase_length:
  88      * @returns: the length of this phrase item.
  89      *
  90      * Get the length of this phrase item.
  91      *
  92      */
  93     guint8 get_phrase_length(){
  94         char * buf_begin = (char *)m_chunk.begin();
  95         return (*(guint8 *)buf_begin);
  96     }
  97
  98     /**
  99      * PhraseItem::get_n_pronunciation:
 100      * @returns: the number of the pronunciations.
 101      *
 102      * Get the number of the pronunciations.
 103      *
 104      */
 105     guint8 get_n_pronunciation(){
 106         char * buf_begin = ( char *) m_chunk.begin();
 107         return (*(guint8 *)(buf_begin + sizeof(guint8)));
 108     }
 109
 110     /**
 111      * PhraseItem::get_unigram_frequency:
 112      * @returns: the uni-gram frequency of this phrase item.
 113      *
 114      * Get the uni-gram frequency of this phrase item.
 115      *
 116      */
 117     guint32 get_unigram_frequency(){
 118         char * buf_begin = (char *)m_chunk.begin();
 119         return (*(guint32 *)(buf_begin + sizeof(guint8) + sizeof(guint8)));
 120     }
 121
 122     /**
 123      * PhraseItem::get_pronunciation_possibility:
 124      * @options: the pinyin options.
 125      * @keys: the pronunciation keys.
 126      * @returns: the possibility of this phrase item pronounces the pinyin.
 127      *
 128      * Get the possibility of this phrase item pronounces the pinyin.
 129      *
 130      */
 131     gfloat get_pronunciation_possibility(pinyin_option_t options,
 132                                          ChewingKey * keys){
 133         guint8 phrase_length = get_phrase_length();
 134         guint8 npron = get_n_pronunciation();
 135         size_t offset = phrase_item_header + phrase_length * sizeof (ucs4_t);
 136         char * buf_begin = (char *)m_chunk.begin();
 137         guint32 matched = 0, total_freq =0;
 138         for ( int i = 0 ; i < npron ; ++i){
 139             char * chewing_begin = buf_begin + offset +
 140                 i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
 141             guint32 * freq = (guint32 *)(chewing_begin +
 142                                          phrase_length * sizeof(ChewingKey));
 143             total_freq += *freq;
 144             if ( 0 == pinyin_compare_with_ambiguities2
 145                  (options,  keys,
 146                   (ChewingKey *)chewing_begin,phrase_length) ){
 147                 matched += *freq;
 148             }
 149         }
 150         // use preprocessor to avoid zero freq, in gen_pinyin_table.
 151         /*
 152         if ( 0 == total_freq )
 153             return 0.1;
 154         */
 155         gfloat retval = matched / (gfloat) total_freq;
 156         /*
 157         if ( 0 == retval )
 158             return 0.03;
 159         */
 160         return retval;
 161     }
 162
 163     /**
 164      * PhraseItem::increase_pronunciation_possibility:
 165      * @options: the pinyin options.
 166      * @keys: the pronunciation keys.
 167      * @delta: the delta to be added to the pronunciation keys.
 168      *
 169      * Add the delta to the pronunciation of the pronunciation keys.
 170      *
 171      */
 172     void increase_pronunciation_possibility(pinyin_option_t options,
 173                                      ChewingKey * keys,
 174                                      gint32 delta);
 175
 176     /**
 177      * PhraseItem::get_phrase_string:
 178      * @phrase: the ucs4 character buffer.
 179      * @returns: whether the get operation is successful.
 180      *
 181      * Get the ucs4 characters of this phrase item.
 182      *
 183      */
 184     bool get_phrase_string(ucs4_t * phrase);
 185
 186     /**
 187      * PhraseItem::set_phrase_string:
 188      * @phrase_length: the ucs4 character length of this phrase item.
 189      * @phrase: the ucs4 character buffer.
 190      * @returns: whether the set operation is successful.
 191      *
 192      * Set the length and ucs4 characters of this phrase item.
 193      *
 194      */
 195     bool set_phrase_string(guint8 phrase_length, ucs4_t * phrase);
 196
 197     /**
 198      * PhraseItem::get_nth_pronunciation:
 199      * @index: the pronunciation index.
 200      * @keys: the pronunciation keys.
 201      * @freq: the frequency of the pronunciation.
 202      * @returns: whether the get operation is successful.
 203      *
 204      * Get the nth pronunciation of this phrase item.
 205      *
 206      */
 207     bool get_nth_pronunciation(size_t index,
 208                                /* out */ ChewingKey * keys,
 209                                /* out */ guint32 & freq);
 210
 211     /**
 212      * PhraseItem::append_pronunciation:
 213      * @keys: the pronunciation keys.
 214      * @freq: the frequency of the pronunciation.
 215      *
 216      * Append one pronunciation.
 217      *
 218      */
 219     void append_pronunciation(ChewingKey * keys, guint32 freq);
 220
 221     /**
 222      * PhraseItem::remove_nth_pronunciation:
 223      * @index: the pronunciation index.
 224      *
 225      * Remove the nth pronunciation.
 226      *
 227      * Note: Normally don't change the first pronunciation,
 228      * which decides the token number.
 229      *
 230      */
 231     void remove_nth_pronunciation(size_t index);
 232
 233     bool operator == (const PhraseItem & rhs) const{
 234         if (m_chunk.size() != rhs.m_chunk.size())
 235             return false;
 236         return memcmp(m_chunk.begin(), rhs.m_chunk.begin(),
 237                       m_chunk.size()) == 0;
 238     }
 239
 240     bool operator != (const PhraseItem & rhs) const{
 241         return ! (*this == rhs);
 242     }
 243 };
 244
 245 /*
 246  *  In Sub Phrase Index, token == (token & PHRASE_MASK).
 247  */
 248
 249 /**
 250  * SubPhraseIndex:
 251  *
 252  * The SubPhraseIndex class for internal usage.
 253  *
 254  */
 255 class SubPhraseIndex{
 256 private:
 257     guint32 m_total_freq;
 258     MemoryChunk m_phrase_index;
 259     MemoryChunk m_phrase_content;
 260     MemoryChunk * m_chunk;
 261
 262     void reset(){
 263         m_total_freq = 0;
 264         m_phrase_index.set_size(0);
 265         m_phrase_content.set_size(0);
 266         if ( m_chunk ){
 267             delete m_chunk;
 268             m_chunk = NULL;
 269         }
 270     }
 271
 272 public:
 273     /**
 274      * SubPhraseIndex::SubPhraseIndex:
 275      *
 276      * The constructor of the SubPhraseIndex.
 277      *
 278      */
 279     SubPhraseIndex():m_total_freq(0){
 280         m_chunk = NULL;
 281     }
 282
 283     /**
 284      * SubPhraseIndex::~SubPhraseIndex:
 285      *
 286      * The destructor of the SubPhraseIndex.
 287      *
 288      */
 289     ~SubPhraseIndex(){
 290         reset();
 291     }
 292
 293     /**
 294      * SubPhraseIndex::load:
 295      * @chunk: the memory chunk of the binary sub phrase index.
 296      * @offset: the begin of binary data in the memory chunk.
 297      * @end: the end of binary data in the memory chunk.
 298      * @returns: whether the load operation is successful.
 299      *
 300      * Load the sub phrase index from the memory chunk.
 301      *
 302      */
 303     bool load(MemoryChunk * chunk,
 304               table_offset_t offset, table_offset_t end);
 305
 306     /**
 307      * SubPhraseIndex::store:
 308      * @new_chunk: the new memory chunk to store this sub phrase index.
 309      * @offset: the begin of binary data in the memory chunk.
 310      * @end: the end of stored binary data in the memory chunk.
 311      * @returns: whether the store operation is successful.
 312      *
 313      * Store the sub phrase index to the new memory chunk.
 314      *
 315      */
 316     bool store(MemoryChunk * new_chunk,
 317                table_offset_t offset, table_offset_t & end);
 318
 319     /**
 320      * SubPhraseIndex::diff:
 321      * @oldone: the original content of sub phrase index.
 322      * @logger: the delta information of user self-learning data.
 323      * @returns: whether the diff operation is successful.
 324      *
 325      * Compare this sub phrase index with the original content of the system
 326      * sub phrase index to generate the logger of difference.
 327      *
 328      * Note: Switch to logger format to reduce user space storage.
 329      *
 330      */
 331     bool diff(SubPhraseIndex * oldone, PhraseIndexLogger * logger);
 332
 333     /**
 334      * SubPhraseIndex::merge:
 335      * @logger: the logger of difference in user home directory.
 336      * @returns: whether the merge operation is successful.
 337      *
 338      * Merge the user logger of difference with this sub phrase index.
 339      *
 340      */
 341     bool merge(PhraseIndexLogger * logger);
 342
 343     /**
 344      * SubPhraseIndex::get_range:
 345      * @range: the token range.
 346      * @returns: whether the get operation is successful.
 347      *
 348      * Get the token range in this sub phrase index.
 349      *
 350      */
 351     int get_range(/* out */ PhraseIndexRange & range);
 352
 353     /**
 354      * SubPhraseIndex::get_phrase_index_total_freq:
 355      * @returns: the total frequency of this sub phrase index.
 356      *
 357      * Get the total frequency of this sub phrase index.
 358      *
 359      * Note: maybe call it "Zero-gram".
 360      *
 361      */
 362     guint32 get_phrase_index_total_freq();
 363
 364     /**
 365      * SubPhraseIndex::add_unigram_frequency:
 366      * @token: the phrase token.
 367      * @delta: the delta value of the phrase token.
 368      * @returns: the status of the add operation.
 369      *
 370      * Add delta value to the phrase of the token.
 371      *
 372      * Note: this method is a fast path to add delta value.
 373      * Maybe use the get_phrase_item method instead in future.
 374      *
 375      */
 376     int add_unigram_frequency(phrase_token_t token, guint32 delta);
 377
 378     /**
 379      * SubPhraseIndex::get_phrase_item:
 380      * @token: the phrase token.
 381      * @item: the phrase item of the token.
 382      * @returns: the status of the get operation.
 383      *
 384      * Get the phrase item from this sub phrase index.
 385      *
 386      * Note:get_phrase_item function can't modify the phrase item size,
 387      * but can increment the freq of the special pronunciation,
 388      * or change the content without size increasing.
 389      *
 390      */
 391     int get_phrase_item(phrase_token_t token, PhraseItem & item);
 392
 393     /**
 394      * SubPhraseIndex::add_phrase_item:
 395      * @token: the phrase token.
 396      * @item: the phrase item of the token.
 397      * @returns: the status of the add operation.
 398      *
 399      * Add the phrase item to this sub phrase index.
 400      *
 401      */
 402     int add_phrase_item(phrase_token_t token, PhraseItem * item);
 403
 404     /**
 405      * SubPhraseIndex::remove_phrase_item:
 406      * @token: the phrase token.
 407      * @item: the removed phrase item of the token.
 408      * @returns: the status of the remove operation.
 409      *
 410      * Remove the phrase item of the token.
 411      *
 412      * Note: this remove_phrase_item method will substract the unigram
 413      * frequency of the removed item from m_total_freq.
 414      *
 415      */
 416     int remove_phrase_item(phrase_token_t token, /* out */ PhraseItem * & item);
 417
 418 };
 419
 420 /**
 421  * FacadePhraseIndex:
 422  *
 423  * The facade class of phrase index.
 424  *
 425  */
 426 class FacadePhraseIndex{
 427 private:
 428     guint32 m_total_freq;
 429     SubPhraseIndex * m_sub_phrase_indices[PHRASE_INDEX_LIBRARY_COUNT];
 430 public:
 431     /**
 432      * FacadePhraseIndex::FacadePhraseIndex:
 433      *
 434      * The constructor of the FacadePhraseIndex.
 435      *
 436      */
 437     FacadePhraseIndex(){
 438         m_total_freq = 0;
 439         memset(m_sub_phrase_indices, 0, sizeof(m_sub_phrase_indices));
 440     }
 441
 442     /**
 443      * FacadePhraseIndex::~FacadePhraseIndex:
 444      *
 445      * The destructor of the FacadePhraseIndex.
 446      *
 447      */
 448     ~FacadePhraseIndex(){
 449         for ( size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i){
 450             if ( m_sub_phrase_indices[i] ){
 451                 delete m_sub_phrase_indices[i];
 452                 m_sub_phrase_indices[i] = NULL;
 453             }
 454         }
 455     }
 456
 457     /**
 458      * FacadePhraseIndex::load_text:
 459      * @phrase_index: the index of sub phrase index to be loaded.
 460      * @infile: the textual format file of the phrase table.
 461      * @returns: whether the load operation is successful.
 462      *
 463      * Load one sub phrase index from the textual format file.
 464      * Note: load sub phrase index according to the config in future.
 465      *
 466      */
 467     bool load_text(guint8 phrase_index, FILE * infile);
 468
 469     /**
 470      * FacadePhraseIndex::load:
 471      * @phrase_index: the index of sub phrase index to be loaded.
 472      * @chunk: the memory chunk of sub phrase index to be loaded.
 473      * @returns: whether the load operation is successful.
 474      *
 475      * Load one sub phrase index from the memory chunk.
 476      *
 477      */
 478     bool load(guint8 phrase_index, MemoryChunk * chunk);
 479
 480     /**
 481      * FacadePhraseIndex::store:
 482      * @phrase_index: the index of sub phrase index to be stored.
 483      * @new_chunk: the memory chunk of sub phrase index to be stored.
 484      * @returns: whether the store operation is successful.
 485      *
 486      * Store one sub phrase index to the memory chunk.
 487      *
 488      */
 489     bool store(guint8 phrase_index, MemoryChunk * new_chunk);
 490
 491     /**
 492      * FacadePhraseIndex::unload:
 493      * @phrase_index: the index of sub phrase index to be unloaded.
 494      * @returns: whether the unload operation is successful.
 495      *
 496      * Unload one sub phrase index.
 497      *
 498      */
 499     bool unload(guint8 phrase_index);
 500
 501
 502     /**
 503      * FacadePhraseIndex::diff:
 504      * @phrase_index: the index of sub phrase index to be differed.
 505      * @oldchunk: the original content of sub phrase index.
 506      * @newlog: the delta information of user self-learning data.
 507      * @returns: whether the diff operation is successful.
 508      *
 509      * Store user delta information in the logger format.
 510      *
 511      * Note: the ownership of oldchunk is transfered here.
 512      *
 513      */
 514     bool diff(guint8 phrase_index, MemoryChunk * oldchunk,
 515               MemoryChunk * newlog);
 516
 517     /**
 518      * FacadePhraseIndex::merge:
 519      * @phrase_index: the index of sub phrase index to be merged.
 520      * @log: the logger of difference in user home directory.
 521      * @returns: whether the merge operation is successful.
 522      *
 523      * Merge the user logger of difference with the sub phrase index.
 524      *
 525      * Note: the ownership of log is transfered here.
 526      *
 527      */
 528     bool merge(guint8 phrase_index, MemoryChunk * log);
 529
 530     /**
 531      * FacadePhraseIndex::compact:
 532      * @returns: whether the compact operation is successful.
 533      *
 534      * Compat all sub phrase index memory usage.
 535      *
 536      */
 537     bool compact();
 538
 539     /**
 540      * FacadePhraseIndex::get_sub_phrase_range:
 541      * @min_index: the minimal sub phrase index.
 542      * @max_index: the maximal sub phrase index.
 543      * @returns: the status of the get operation.
 544      *
 545      * Get the minimum and maximum of the sub phrase index.
 546      *
 547      */
 548     int get_sub_phrase_range(guint8 & min_index, guint8 & max_index);
 549
 550     /**
 551      * FacadePhraseIndex::get_range:
 552      * @phrase_index: the index of sub phrase index.
 553      * @range: the token range of the sub phrase index.
 554      * @returns: the status of the get operation.
 555      *
 556      * Get the token range of the sub phrase index.
 557      *
 558      */
 559     int get_range(guint8 phrase_index, /* out */ PhraseIndexRange & range);
 560
 561     /**
 562      * FacadePhraseIndex::get_phrase_index_total_freq:
 563      * @returns: the total freq of the facade phrase index.
 564      *
 565      * Get the total freq of the facade phrase index.
 566      *
 567      * Note: maybe call it "Zero-gram".
 568      *
 569      */
 570     guint32 get_phrase_index_total_freq(){
 571         return m_total_freq;
 572     }
 573
 574     /**
 575      * FacadePhraseIndex::add_unigram_frequency:
 576      * @token: the phrase token.
 577      * @delta: the delta value of the phrase token.
 578      * @returns: the status of the add operation.
 579      *
 580      * Add delta value to the phrase of the token.
 581      *
 582      */
 583     int add_unigram_frequency(phrase_token_t token, guint32 delta){
 584         guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
 585         SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index];
 586         if ( !sub_phrase )
 587             return ERROR_NO_SUB_PHRASE_INDEX;
 588         m_total_freq += delta;
 589         return sub_phrase->add_unigram_frequency(token, delta);
 590     }
 591
 592     /**
 593      * FacadePhraseIndex::get_phrase_item:
 594      * @token: the phrase token.
 595      * @item: the phrase item of the token.
 596      * @returns: the status of the get operation.
 597      *
 598      * Get the phrase item from the facade phrase index.
 599      *
 600      */
 601     int get_phrase_item(phrase_token_t token, PhraseItem & item){
 602         guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
 603         SubPhraseIndex * sub_phrase = m_sub_phrase_indices[index];
 604         if ( !sub_phrase )
 605             return ERROR_NO_SUB_PHRASE_INDEX;
 606         return sub_phrase->get_phrase_item(token, item);
 607     }
 608
 609     /**
 610      * FacadePhraseIndex::add_phrase_item:
 611      * @token: the phrase token.
 612      * @item: the phrase item of the token.
 613      * @returns: the status of the add operation.
 614      *
 615      * Add the phrase item to the facade phrase index.
 616      *
 617      */
 618     int add_phrase_item(phrase_token_t token, PhraseItem * item){
 619         guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
 620         SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index];
 621         if ( !sub_phrase ){
 622             sub_phrase = new SubPhraseIndex;
 623         }
 624         m_total_freq += item->get_unigram_frequency();
 625         return sub_phrase->add_phrase_item(token, item);
 626     }
 627
 628     /**
 629      * FacadePhraseIndex::remove_phrase_item:
 630      * @token: the phrase token.
 631      * @item: the removed phrase item of the token.
 632      * @returns: the status of the remove operation.
 633      *
 634      * Remove the phrase item of the token.
 635      *
 636      */
 637     int remove_phrase_item(phrase_token_t token, PhraseItem * & item){
 638         guint8 index = PHRASE_INDEX_LIBRARY_INDEX(token);
 639         SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index];
 640         if ( !sub_phrase ){
 641             return ERROR_NO_SUB_PHRASE_INDEX;
 642         }
 643         int result = sub_phrase->remove_phrase_item(token, item);
 644         if ( result )
 645             return result;
 646         m_total_freq -= item->get_unigram_frequency();
 647         return result;
 648     }
 649
 650     /**
 651      * FacadePhraseIndex::prepare_ranges:
 652      * @ranges: the ranges to be prepared.
 653      * @returns: whether the prepare operation is successful.
 654      *
 655      * Prepare the ranges.
 656      *
 657      */
 658     bool prepare_ranges(PhraseIndexRanges ranges) {
 659         /* assume memset(ranges, 0, sizeof(ranges)); */
 660         for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
 661             GArray * & range = ranges[i];
 662             assert(NULL == range);
 663
 664             SubPhraseIndex * sub_phrase = m_sub_phrase_indices[i];
 665             if (sub_phrase) {
 666                 range = g_array_new(FALSE, FALSE, sizeof(PhraseIndexRange));
 667             }
 668         }
 669         return true;
 670     }
 671
 672     /**
 673      * FacadePhraseIndex::destroy_ranges:
 674      * @ranges: the ranges to be destroyed.
 675      * @returns: whether the destroy operation is successful.
 676      *
 677      * Destroy the ranges.
 678      *
 679      */
 680     bool destroy_ranges(PhraseIndexRanges ranges) {
 681         for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
 682             GArray * & range = ranges[i];
 683             if (range) {
 684                 g_array_free(range, TRUE);
 685                 range = NULL;
 686             }
 687         }
 688         return true;
 689     }
 690
 691     /**
 692      * FacadePhraseIndex::prepare_tokens:
 693      * @tokens: the tokens to be prepared.
 694      * @returns: whether the prepare operation is successful.
 695      *
 696      * Prepare the tokens.
 697      *
 698      */
 699     bool prepare_tokens(PhraseTokens tokens) {
 700         /* assume memset(tokens, 0, sizeof(tokens)); */
 701         for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
 702             GArray * & token = tokens[i];
 703             assert(NULL == token);
 704
 705             SubPhraseIndex * sub_phrase = m_sub_phrase_indices[i];
 706             if (sub_phrase) {
 707                 token = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
 708             }
 709         }
 710         return true;
 711     }
 712
 713     /**
 714      * FacadePhraseIndex::destroy_tokens:
 715      * @tokens: the tokens to be destroyed.
 716      * @returns: whether the destroy operation is successful.
 717      *
 718      * Destroy the tokens.
 719      *
 720      */
 721     bool destroy_tokens(PhraseTokens tokens) {
 722         for (size_t i = 0; i < PHRASE_INDEX_LIBRARY_COUNT; ++i) {
 723             GArray * & token = tokens[i];
 724             if (token) {
 725                 g_array_free(token, TRUE);
 726                 token = NULL;
 727             }
 728         }
 729         return true;
 730     }
 731
 732     /**
 733      * FacadePhraseIndex::create_sub_phrase:
 734      * @index: the phrase index to be created.
 735      * @returns: the result of the create operation.
 736      *
 737      * Create the sub phrase index.
 738      *
 739      */
 740     int create_sub_phrase(guint8 index) {
 741         SubPhraseIndex * & sub_phrase = m_sub_phrase_indices[index];
 742         if (sub_phrase) {
 743             return ERROR_ALREADY_EXISTS;
 744         }
 745
 746         sub_phrase = new SubPhraseIndex;
 747
 748         return ERROR_OK;
 749     }
 750 };
 751
 752 typedef enum {
 753     NOT_USED,                /* not used. */
 754     SYSTEM_FILE,             /* system phrase file. */
 755     USER_FILE,               /* user only phrase file. */
 756 } PHRASE_FILE_TYPE;
 757
 758 typedef struct {
 759     const char * m_table_filename;
 760     const char * m_system_filename;
 761     const char * m_user_filename;
 762     PHRASE_FILE_TYPE m_file_type;
 763 } pinyin_table_info_t;
 764
 765 extern const pinyin_table_info_t pinyin_phrase_files[PHRASE_INDEX_LIBRARY_COUNT];
 766
 767 };
 768
 769 #endif