8be5c0cd2cfea10cbf4c4fd735196e7e91f04a57
[platform/upstream/libpinyin.git] / src / lookup / pinyin_lookup2.cpp
1 /* 
2  *  libpinyin
3  *  Library to deal with pinyin.
4  *  
5  *  Copyright (C) 2012 Peng Wu <alexepico@gmail.com>
6  *  
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  * 
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  *  GNU General Public License for more details.
16  *  
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, write to the Free Software
19  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
20  */
21
22 #include <math.h>
23 #include "facade_chewing_table.h"
24 #include "pinyin_lookup2.h"
25 #include "stl_lite.h"
26
27 using namespace pinyin;
28
29 /*
30 const gfloat PinyinLookup2::bigram_lambda = lambda;
31 const gfloat PinyinLookup2::unigram_lambda = 1 - lambda;
32 */
33
34 /* internal definition */
35 static const size_t nbeam = 32;
36
37 static bool dump_max_value(GPtrArray * values){
38     if (0 == values->len)
39         return false;
40
41     const lookup_value_t * max =
42         (const lookup_value_t *) g_ptr_array_index(values, 0);
43
44     for (size_t i = 1; i < values->len; ++i) {
45         const lookup_value_t * cur =
46             (const lookup_value_t *) g_ptr_array_index(values, i);
47
48         if (cur->m_poss > max->m_poss)
49             max = cur;
50     }
51
52     printf("max value: %f\n", max->m_poss);
53
54     return true;
55 }
56
57 static bool dump_all_values(GPtrArray * values) {
58     if (0 == values->len)
59         return false;
60
61     printf("values:");
62     for (size_t i = 0; i < values->len; ++i) {
63         const lookup_value_t * cur =
64             (const lookup_value_t *) g_ptr_array_index(values, i);
65
66         printf("%f\t", cur->m_poss);
67     }
68     printf("\n");
69
70     return true;
71 }
72
73 /* populate the candidates. */
74 static bool populate_candidates(/* out */ GPtrArray * candidates,
75                                 /* in */ LookupStepContent step) {
76     g_ptr_array_set_size(candidates, 0);
77
78     if (0 == step->len)
79         return false;
80
81     for (size_t i = 0; i < step->len; ++i) {
82         lookup_value_t * value = &g_array_index
83             (step, lookup_value_t, i);
84
85         g_ptr_array_add(candidates, value);
86     }
87
88     /* dump_max_value(candidates); */
89
90     return true;
91 }
92
93 static bool lookup_value_less_than(lookup_value_t * lhs, lookup_value_t * rhs){
94     return lhs->m_poss < rhs->m_poss;
95 }
96
97 /* use maximum heap to get the topest results. */
98 static bool get_top_results(/* out */ GPtrArray * topresults,
99                             /* in */ GPtrArray * candidates) {
100     g_ptr_array_set_size(topresults, 0);
101
102     if (0 == candidates->len)
103         return false;
104
105     lookup_value_t ** begin =
106         (lookup_value_t **) &g_ptr_array_index(candidates, 0);
107     lookup_value_t ** end =
108         (lookup_value_t **) &g_ptr_array_index(candidates, candidates->len);
109
110     std_lite::make_heap(begin, end, lookup_value_less_than);
111
112     while (end != begin) {
113         lookup_value_t * one = *begin;
114         g_ptr_array_add(topresults, one);
115
116         std_lite::pop_heap(begin, end, lookup_value_less_than);
117         --end;
118
119         if (topresults->len >= nbeam)
120             break;
121     }
122
123     /* dump_all_values(topresults); */
124
125     return true;
126 }
127
128 static bool populate_prefixes(GPtrArray * steps_index,
129                               GPtrArray * steps_content,
130                               TokenVector prefixes) {
131     assert(prefixes->len > 0);
132
133     for (size_t i = 0; i < prefixes->len; ++i) {
134         phrase_token_t token = g_array_index(prefixes, phrase_token_t, i);
135         lookup_key_t initial_key = token;
136         lookup_value_t initial_value(log(1));
137         initial_value.m_handles[1] = token;
138
139         LookupStepContent initial_step_content = (LookupStepContent)
140             g_ptr_array_index(steps_content, 0);
141         initial_step_content = g_array_append_val
142             (initial_step_content, initial_value);
143
144         LookupStepIndex initial_step_index = (LookupStepIndex)
145             g_ptr_array_index(steps_index, 0);
146         g_hash_table_insert(initial_step_index,
147                             GUINT_TO_POINTER(initial_key),
148                             GUINT_TO_POINTER(initial_step_content->len - 1));
149     }
150
151     return true;
152 }
153
154 static bool init_steps(GPtrArray * steps_index,
155                        GPtrArray * steps_content,
156                        int nstep){
157     /* add null start step */
158     g_ptr_array_set_size(steps_index, nstep);
159     g_ptr_array_set_size(steps_content, nstep);
160
161     for (int i = 0; i < nstep; ++i) {
162         /* initialize steps_index */
163         g_ptr_array_index(steps_index, i) = g_hash_table_new(g_direct_hash, g_direct_equal);
164         /* initialize steps_content */
165         g_ptr_array_index(steps_content, i) = g_array_new(FALSE, FALSE, sizeof(lookup_value_t));
166     }
167
168     return true;
169 }
170
171 static void clear_steps(GPtrArray * steps_index, GPtrArray * steps_content){
172     /* clear steps_index */
173     for ( size_t i = 0; i < steps_index->len; ++i){
174         GHashTable * table = (GHashTable *) g_ptr_array_index(steps_index, i);
175         g_hash_table_destroy(table);
176         g_ptr_array_index(steps_index, i) = NULL;
177     }
178
179     /* clear steps_content */
180     for ( size_t i = 0; i < steps_content->len; ++i){
181         GArray * array = (GArray *) g_ptr_array_index(steps_content, i);
182         g_array_free(array, TRUE);
183         g_ptr_array_index(steps_content, i) = NULL;
184     }
185 }
186
187
188 PinyinLookup2::PinyinLookup2(const gfloat lambda,
189                              pinyin_option_t options,
190                              FacadeChewingTable * pinyin_table,
191                              FacadePhraseIndex * phrase_index,
192                              Bigram * system_bigram,
193                              Bigram * user_bigram)
194     : bigram_lambda(lambda),
195       unigram_lambda(1. - lambda)
196 {
197     m_options = options;
198     m_pinyin_table = pinyin_table;
199     m_phrase_index = phrase_index;
200     m_system_bigram = system_bigram;
201     m_user_bigram = user_bigram;
202
203     m_steps_index = g_ptr_array_new();
204     m_steps_content = g_ptr_array_new();
205 }
206
207 PinyinLookup2::~PinyinLookup2(){
208     clear_steps(m_steps_index, m_steps_content);
209     g_ptr_array_free(m_steps_index, TRUE);
210     g_ptr_array_free(m_steps_content, TRUE);
211 }
212
213
214 bool PinyinLookup2::get_best_match(TokenVector prefixes,
215                                    ChewingKeyVector keys,
216                                    CandidateConstraints constraints,
217                                    MatchResults & results){
218     m_constraints = constraints;
219     m_keys = keys;
220     int nstep = keys->len + 1;
221
222     clear_steps(m_steps_index, m_steps_content);
223
224     init_steps(m_steps_index, m_steps_content, nstep);
225
226     populate_prefixes(m_steps_index, m_steps_content, prefixes);
227
228     PhraseIndexRanges ranges;
229     memset(ranges, 0, sizeof(PhraseIndexRanges));
230     m_phrase_index->prepare_ranges(ranges);
231
232     GPtrArray * candidates = g_ptr_array_new();
233     GPtrArray * topresults = g_ptr_array_new();
234
235     /* begin the viterbi beam search. */
236     for ( int i = 0; i < nstep - 1; ++i ){
237         lookup_constraint_t * cur_constraint = &g_array_index
238             (m_constraints, lookup_constraint_t, i);
239
240         if (CONSTRAINT_NOSEARCH == cur_constraint->m_type)
241             continue;
242
243         LookupStepContent step = (LookupStepContent)
244             g_ptr_array_index(m_steps_content, i);
245
246         populate_candidates(candidates, step);
247         get_top_results(topresults, candidates);
248
249         if (0 == topresults->len)
250             continue;
251
252         for ( int m = i + 1; m < nstep; ++m ){
253             const int len = m - i;
254             if (len > MAX_PHRASE_LENGTH)
255                 break;
256
257             lookup_constraint_t * next_constraint = &g_array_index
258                 (m_constraints, lookup_constraint_t, m - 1);
259
260             if (CONSTRAINT_NOSEARCH == next_constraint->m_type)
261                 break;
262
263             ChewingKey * pinyin_keys = (ChewingKey *)m_keys->data;
264             /* do one pinyin table search. */
265             int result = m_pinyin_table->search(len, pinyin_keys + i, ranges);
266
267             if (result & SEARCH_OK) {
268                 /* assume topresults always contains items. */
269                 search_bigram2(topresults, i, ranges),
270                     search_unigram2(topresults, i, ranges);
271             }
272
273             /* poke the next constraint. */
274             ++ next_constraint;
275             if (CONSTRAINT_ONESTEP == next_constraint->m_type)
276                 break;
277
278             /* no longer pinyin */
279             if (!(result & SEARCH_CONTINUED))
280                 break;
281         }
282     }
283
284     m_phrase_index->destroy_ranges(ranges);
285
286     g_ptr_array_free(candidates, TRUE);
287     g_ptr_array_free(topresults, TRUE);
288
289     return final_step(results);
290 }
291
292 bool PinyinLookup2::search_unigram2(GPtrArray * topresults, int nstep,
293                                     PhraseIndexRanges ranges) {
294
295     if (0 == topresults->len)
296         return false;
297
298     lookup_value_t * max = (lookup_value_t *)
299         g_ptr_array_index(topresults, 0);
300
301     lookup_constraint_t * constraint =
302         &g_array_index(m_constraints, lookup_constraint_t, nstep);
303
304     if (CONSTRAINT_ONESTEP == constraint->m_type) {
305         return unigram_gen_next_step(nstep, max, constraint->m_token);
306     }
307
308     bool found = false;
309
310     if (NO_CONSTRAINT == constraint->m_type) {
311         for ( size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m){
312             GArray * array = ranges[m];
313             if ( !array ) continue;
314
315             for ( size_t n = 0; n < array->len; ++n){
316                 PhraseIndexRange * range = &g_array_index(array, PhraseIndexRange, n);
317                 for ( phrase_token_t token = range->m_range_begin;
318                       token != range->m_range_end; ++token){
319                     found = unigram_gen_next_step(nstep, max, token)|| found;
320                 }
321             }
322         }
323     }
324
325     return found;
326 }
327
328 bool PinyinLookup2::search_bigram2(GPtrArray * topresults, int nstep,
329                                    PhraseIndexRanges ranges) {
330
331     lookup_constraint_t * constraint =
332         &g_array_index(m_constraints, lookup_constraint_t, nstep);
333
334     bool found = false;
335     BigramPhraseArray bigram_phrase_items = g_array_new
336         (FALSE, FALSE, sizeof(BigramPhraseItem));
337
338     for (size_t i = 0; i < topresults->len; ++i) {
339         lookup_value_t * value = (lookup_value_t *)
340             g_ptr_array_index(topresults, i);
341
342         phrase_token_t index_token = value->m_handles[1];
343
344         SingleGram * system = NULL, * user = NULL;
345         m_system_bigram->load(index_token, system);
346         m_user_bigram->load(index_token, user);
347
348         if ( !merge_single_gram(&m_merged_single_gram, system, user) )
349             continue;
350
351         if ( CONSTRAINT_ONESTEP == constraint->m_type ){
352             phrase_token_t token = constraint->m_token;
353
354             guint32 freq;
355             if( m_merged_single_gram.get_freq(token, freq) ){
356                 guint32 total_freq;
357                 m_merged_single_gram.get_total_freq(total_freq);
358                 gfloat bigram_poss = freq / (gfloat) total_freq;
359                 found = bigram_gen_next_step(nstep, value, token, bigram_poss) || found;
360             }
361         }
362
363         if (NO_CONSTRAINT == constraint->m_type) {
364             for( size_t m = 0; m < PHRASE_INDEX_LIBRARY_COUNT; ++m){
365                 GArray * array = ranges[m];
366                 if ( !array ) continue;
367
368                 for ( size_t n = 0; n < array->len; ++n){
369                     PhraseIndexRange * range =
370                         &g_array_index(array, PhraseIndexRange, n);
371
372                     g_array_set_size(bigram_phrase_items, 0);
373                     m_merged_single_gram.search(range, bigram_phrase_items);
374                     for( size_t k = 0; k < bigram_phrase_items->len; ++k) {
375                         BigramPhraseItem * item = &g_array_index(bigram_phrase_items, BigramPhraseItem, k);
376                         found = bigram_gen_next_step(nstep, value, item->m_token, item->m_freq) || found;
377                     }
378                 }
379             }
380         }
381         if (system)
382             delete system;
383         if (user)
384             delete user;
385     }
386
387     g_array_free(bigram_phrase_items, TRUE);
388     return found;
389 }
390
391
392 bool PinyinLookup2::unigram_gen_next_step(int nstep,
393                                           lookup_value_t * cur_step,
394                                           phrase_token_t token) {
395
396     if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
397         return false;
398
399     size_t phrase_length = m_cache_phrase_item.get_phrase_length();
400     gdouble elem_poss = m_cache_phrase_item.get_unigram_frequency() / (gdouble)
401         m_phrase_index->get_phrase_index_total_freq();
402     if ( elem_poss < DBL_EPSILON )
403         return false;
404
405     ChewingKey * pinyin_keys = ((ChewingKey *)m_keys->data) + nstep;
406     gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys);
407     if (pinyin_poss < FLT_EPSILON )
408         return false;
409
410     lookup_value_t next_step;
411     next_step.m_handles[0] = cur_step->m_handles[1]; next_step.m_handles[1] = token;
412     next_step.m_poss = cur_step->m_poss + log(elem_poss * pinyin_poss * unigram_lambda);
413     next_step.m_last_step = nstep;
414
415     return save_next_step(nstep + phrase_length, cur_step, &next_step);
416 }
417
418 bool PinyinLookup2::bigram_gen_next_step(int nstep,
419                                          lookup_value_t * cur_step,
420                                          phrase_token_t token,
421                                          gfloat bigram_poss) {
422
423     if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
424         return false;
425
426     size_t phrase_length = m_cache_phrase_item.get_phrase_length();
427     gdouble unigram_poss = m_cache_phrase_item.get_unigram_frequency() /
428         (gdouble) m_phrase_index->get_phrase_index_total_freq();
429     if ( bigram_poss < FLT_EPSILON && unigram_poss < DBL_EPSILON )
430         return false;
431
432     ChewingKey * pinyin_keys = ((ChewingKey *)m_keys->data) + nstep;
433     gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys);
434     if ( pinyin_poss < FLT_EPSILON )
435         return false;
436
437     lookup_value_t next_step;
438     next_step.m_handles[0] = cur_step->m_handles[1]; next_step.m_handles[1] = token;
439     next_step.m_poss = cur_step->m_poss +
440         log((bigram_lambda * bigram_poss + unigram_lambda * unigram_poss) * pinyin_poss);
441     next_step.m_last_step = nstep;
442
443     return save_next_step(nstep + phrase_length, cur_step, &next_step);
444 }
445
446 bool PinyinLookup2::save_next_step(int next_step_pos,
447                                    lookup_value_t * cur_step,
448                                    lookup_value_t * next_step){
449
450     lookup_key_t next_key = next_step->m_handles[1];
451     LookupStepIndex next_lookup_index = (LookupStepIndex)
452         g_ptr_array_index(m_steps_index, next_step_pos);
453     LookupStepContent next_lookup_content = (LookupStepContent)
454         g_ptr_array_index(m_steps_content, next_step_pos);
455
456     gpointer key = NULL, value = NULL;
457     gboolean lookup_result = g_hash_table_lookup_extended
458         (next_lookup_index, GUINT_TO_POINTER(next_key), &key, &value);
459
460     if ( !lookup_result ){
461         g_array_append_val(next_lookup_content, *next_step);
462         g_hash_table_insert(next_lookup_index, GUINT_TO_POINTER(next_key), GUINT_TO_POINTER(next_lookup_content->len - 1));
463         return true;
464     }else{
465         size_t step_index = GPOINTER_TO_UINT(value);
466         lookup_value_t * orig_next_value = &g_array_index
467             (next_lookup_content, lookup_value_t, step_index);
468
469         if ( orig_next_value->m_poss < next_step->m_poss) {
470             /* found better result. */
471             orig_next_value->m_handles[0] = next_step->m_handles[0];
472             assert(orig_next_value->m_handles[1] == next_step->m_handles[1]);
473             orig_next_value->m_poss = next_step->m_poss;
474             orig_next_value->m_last_step = next_step->m_last_step;
475             return true;
476         }
477
478         return false;
479     }
480 }
481
482 bool PinyinLookup2::final_step(MatchResults & results){
483
484     /* reset results */
485     g_array_set_size(results, m_steps_content->len - 1);
486     for (size_t i = 0; i < results->len; ++i){
487         phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
488         *token = null_token;
489     }
490
491     /* find max element */
492     size_t last_step_pos = m_steps_content->len - 1;
493     GArray * last_step_array = (GArray *)g_ptr_array_index(m_steps_content, last_step_pos);
494     if ( last_step_array->len == 0 )
495         return false;
496
497     lookup_value_t * max_value = &g_array_index(last_step_array, lookup_value_t, 0);
498     for ( size_t i = 1; i < last_step_array->len; ++i){
499         lookup_value_t * cur_value = &g_array_index(last_step_array, lookup_value_t, i);
500         if ( cur_value->m_poss > max_value->m_poss )
501             max_value = cur_value;
502     }
503
504     /* backtracing */
505     while( true ){
506         int cur_step_pos = max_value->m_last_step;
507         if ( -1 == cur_step_pos )
508             break;
509
510         phrase_token_t * token = &g_array_index
511             (results, phrase_token_t, cur_step_pos);
512         *token = max_value->m_handles[1];
513
514         phrase_token_t last_token = max_value->m_handles[0];
515         LookupStepIndex lookup_step_index = (LookupStepIndex)
516             g_ptr_array_index(m_steps_index, cur_step_pos);
517
518         gpointer key = NULL, value = NULL;
519         gboolean result = g_hash_table_lookup_extended
520             (lookup_step_index, GUINT_TO_POINTER(last_token), &key, &value);
521         if (!result)
522             return false;
523
524         LookupStepContent lookup_step_content = (LookupStepContent)
525             g_ptr_array_index(m_steps_content, cur_step_pos);
526         max_value = &g_array_index
527             (lookup_step_content, lookup_value_t, GPOINTER_TO_UINT(value));
528     }
529
530     /* no need to reverse the result */
531     return true;
532 }
533
534
535 bool PinyinLookup2::train_result2(ChewingKeyVector keys,
536                                   CandidateConstraints constraints,
537                                   MatchResults results) {
538     const guint32 initial_seed = 23 * 3;
539     const guint32 expand_factor = 2;
540     const guint32 unigram_factor = 7;
541     const guint32 pinyin_factor = 1;
542     const guint32 ceiling_seed = 23 * 15 * 64;
543
544     /* begin training based on constraints and results. */
545     bool train_next = false;
546     ChewingKey * pinyin_keys = (ChewingKey *) keys->data;
547
548     phrase_token_t last_token = sentence_start;
549     /* constraints->len + 1 == results->len */
550     for (size_t i = 0; i < constraints->len; ++i) {
551         phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
552         if (null_token == *token)
553             continue;
554
555         lookup_constraint_t * constraint = &g_array_index
556             (constraints, lookup_constraint_t, i);
557         if (train_next || CONSTRAINT_ONESTEP == constraint->m_type) {
558             if (CONSTRAINT_ONESTEP == constraint->m_type) {
559                 assert(*token == constraint->m_token);
560                 train_next = true;
561             } else {
562                 train_next = false;
563             }
564
565             guint32 seed = initial_seed;
566             /* train bi-gram first, and get train seed. */
567             if (last_token) {
568                 SingleGram * user = NULL;
569                 m_user_bigram->load(last_token, user);
570
571                 guint32 total_freq = 0;
572                 if (!user) {
573                     user = new SingleGram;
574                 }
575                 assert(user->get_total_freq(total_freq));
576
577                 guint32 freq = 0;
578                 /* compute train factor */
579                 if (!user->get_freq(*token, freq)) {
580                     assert(user->insert_freq(*token, 0));
581                     seed = initial_seed;
582                 } else {
583                     seed = std_lite::max(freq, initial_seed);
584                     seed *= expand_factor;
585                     seed = std_lite::min(seed, ceiling_seed);
586                 }
587
588                 /* protect against total_freq overflow */
589                 if (seed > 0 && total_freq > total_freq + seed)
590                     goto next;
591
592                 assert(user->set_total_freq(total_freq + seed));
593                 /* if total_freq is not overflow, then freq won't overflow. */
594                 assert(user->set_freq(*token, freq + seed));
595                 assert(m_user_bigram->store(last_token, user));
596             next:
597                 if (user)
598                     delete user;
599             }
600
601             /* train uni-gram */
602             m_phrase_index->get_phrase_item(*token, m_cache_phrase_item);
603             m_cache_phrase_item.increase_pronunciation_possibility
604                 (m_options, pinyin_keys + i, seed * pinyin_factor);
605             m_phrase_index->add_unigram_frequency
606                 (*token, seed * unigram_factor);
607         }
608         last_token = *token;
609     }
610     return true;
611 }
612
613
614 int PinyinLookup2::add_constraint(CandidateConstraints constraints,
615                                   size_t index,
616                                   phrase_token_t token) {
617
618     if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
619         return 0;
620
621     size_t phrase_length = m_cache_phrase_item.get_phrase_length();
622     if ( index + phrase_length > constraints->len )
623         return 0;
624
625     for (size_t i = index; i < index + phrase_length; ++i){
626         clear_constraint(constraints, i);
627     }
628
629     /* store one step constraint */
630     lookup_constraint_t * constraint = &g_array_index
631         (constraints, lookup_constraint_t, index);
632     constraint->m_type = CONSTRAINT_ONESTEP;
633     constraint->m_token = token;
634
635     /* propagate no search constraint */
636     for (size_t i = 1; i < phrase_length; ++i){
637         constraint = &g_array_index(constraints, lookup_constraint_t, index + i);
638         constraint->m_type = CONSTRAINT_NOSEARCH;
639         constraint->m_constraint_step = index;
640     }
641
642     return phrase_length;
643 }
644
645 bool PinyinLookup2::clear_constraint(CandidateConstraints constraints,
646                                     int index) {
647     if (index < 0 || index >= constraints->len)
648         return false;
649
650     lookup_constraint_t * constraint = &g_array_index
651         (constraints, lookup_constraint_t, index);
652
653     if (NO_CONSTRAINT == constraint->m_type)
654         return false;
655
656     if (CONSTRAINT_NOSEARCH == constraint->m_type){
657         index = constraint->m_constraint_step;
658         constraint = &g_array_index(constraints, lookup_constraint_t, index);
659     }
660
661     /* now var constraint points to the one step constraint. */
662     assert(constraint->m_type == CONSTRAINT_ONESTEP);
663
664     phrase_token_t token = constraint->m_token;
665     if (m_phrase_index->get_phrase_item(token, m_cache_phrase_item))
666         return false;
667
668     size_t phrase_length = m_cache_phrase_item.get_phrase_length();
669     for ( size_t i = 0; i < phrase_length; ++i){
670         if (index + i >= constraints->len)
671             continue;
672
673         constraint = &g_array_index
674             (constraints, lookup_constraint_t, index + i);
675         constraint->m_type = NO_CONSTRAINT;
676     }
677
678     return true;
679 }
680
681 bool PinyinLookup2::validate_constraint(CandidateConstraints constraints,
682                                         ChewingKeyVector keys) {
683     /* resize constraints array first */
684     size_t constraints_length = constraints->len;
685
686     if ( keys->len > constraints_length ){
687         g_array_set_size(constraints, keys->len);
688
689         /* initialize new element */
690         for( size_t i = constraints_length; i < keys->len; ++i){
691             lookup_constraint_t * constraint = &g_array_index(constraints, lookup_constraint_t, i);
692             constraint->m_type = NO_CONSTRAINT;
693         }
694
695     }else if (keys->len < constraints_length ){
696         /* just shrink it */
697         g_array_set_size(constraints, keys->len);
698     }
699
700     for ( size_t i = 0; i < constraints->len; ++i){
701         lookup_constraint_t * constraint = &g_array_index
702             (constraints, lookup_constraint_t, i);
703
704         /* handle one step constraint */
705         if ( constraint->m_type == CONSTRAINT_ONESTEP ){
706
707             phrase_token_t token = constraint->m_token;
708             m_phrase_index->get_phrase_item(token, m_cache_phrase_item);
709             size_t phrase_length = m_cache_phrase_item.get_phrase_length();
710
711             /* clear too long constraint */
712             if (i + phrase_length > constraints->len){
713                 clear_constraint(constraints, i);
714                 continue;
715             }
716
717             ChewingKey * pinyin_keys = (ChewingKey *)keys->data;
718             /* clear invalid pinyin */
719             gfloat pinyin_poss = m_cache_phrase_item.get_pronunciation_possibility(m_options, pinyin_keys + i);
720             if (pinyin_poss < FLT_EPSILON)
721                 clear_constraint(constraints, i);
722         }
723     }
724     return true;
725 }