3 * Library to deal with pinyin.
5 * Copyright (C) 2011 Peng Wu <alexepico@gmail.com>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 PhraseLookup * g_phrase_lookup = NULL;
30 printf("Usage: test_phrase_lookup\n");
33 bool try_phrase_lookup(utf16_t * utf16, glong utf16_len){
34 char * result_string = NULL;
35 MatchResults results = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
36 g_phrase_lookup->get_best_match(utf16_len, utf16, results);
38 for ( size_t i = 0; i < results->len; ++i) {
39 phrase_token_t * token = &g_array_index(results, phrase_token_t, i);
40 if ( *token == null_token )
42 printf("%d:%d\t", i, *token);
46 g_phrase_lookup->convert_to_utf8(results, "\n", result_string);
48 printf("%s\n", result_string);
50 fprintf(stderr, "Error: Un-segmentable sentence encountered!\n");
51 g_array_free(results, TRUE);
52 g_free(result_string);
56 int main(int argc, char * argv[]){
59 setlocale(LC_ALL, "");
62 if ( strcmp ("--help", argv[i]) == 0 ){
74 PhraseLargeTable phrase_table;
75 MemoryChunk * chunk = new MemoryChunk;
76 chunk->load("../../data/phrase_index.bin");
77 phrase_table.load(chunk);
80 FacadePhraseIndex phrase_index;
81 chunk = new MemoryChunk;
82 chunk->load("../../data/gb_char.bin");
83 phrase_index.load(1, chunk);
84 chunk = new MemoryChunk;
85 chunk->load("../../data/gbk_char.bin");
86 phrase_index.load(2, chunk);
90 system_bigram.attach("../../data/bigram.db", ATTACH_READONLY);
94 g_phrase_lookup = new PhraseLookup(&phrase_table, &phrase_index,
95 &system_bigram, &user_bigram);
98 char * linebuf = NULL;
101 while( (read = getline(&linebuf, &size, stdin)) != -1 ){
102 if ( '\n' == linebuf[strlen(linebuf) - 1] ) {
103 linebuf[strlen(linebuf) - 1] = '\0';
106 if ( strcmp ( linebuf, "quit" ) == 0)
109 //check non-ucs2 characters
110 const glong num_of_chars = g_utf8_strlen(linebuf, -1);
112 utf16_t * sentence = g_utf8_to_utf16(linebuf, -1, NULL, &len, NULL);
113 if ( len != num_of_chars ) {
114 fprintf(stderr, "non-ucs2 characters are not accepted.\n");
119 try_phrase_lookup(sentence, len);
123 delete g_phrase_lookup;