use binary format of phrase large table
authorPeng Wu <alexepico@gmail.com>
Tue, 10 May 2011 05:50:55 +0000 (13:50 +0800)
committerPeng Wu <alexepico@gmail.com>
Tue, 10 May 2011 05:50:55 +0000 (13:50 +0800)
utils/training/gen_deleted_ngram.cpp
utils/training/gen_ngram.cpp

index 6db511d..cb1c4a0 100644 (file)
@@ -60,20 +60,9 @@ int main(int argc, char * argv[]){
     
     g_phrases = new PhraseLargeTable;
     //init phrase lookup
-    FILE * gb_file = fopen("../../data/gb_char.table", "r");
-    if ( gb_file == NULL ){
-       fprintf(stderr, "can't open gb_char.table!\n");
-       exit(ENOENT);
-    }
-    g_phrases->load_text(gb_file);
-    fclose(gb_file);
-    FILE * gbk_file = fopen("../../data/gbk_char.table", "r");
-    if ( gbk_file == NULL ){
-       fprintf(stderr, "can't open gbk_char.table!\n");
-       exit(ENOENT);
-    }
-    g_phrases->load_text(gbk_file);
-    fclose(gbk_file);
+    MemoryChunk * chunk = new MemoryChunk;
+    chunk->load("../../data/phrase_index.bin");
+    g_phrases->load(chunk);
 
     Bigram bigram;
     bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE);
index 71ec3f8..228abca 100644 (file)
@@ -60,25 +60,14 @@ int main(int argc, char * argv[]){
     
     g_phrases = new PhraseLargeTable;
     //init phrase lookup
-    FILE * gb_file = fopen("../../data/gb_char.table", "r");
-    if ( gb_file == NULL ){
-       fprintf(stderr, "can't open gb_char.table!\n");
-       exit(ENOENT);
-    }
-    g_phrases->load_text(gb_file);
-    fclose(gb_file);
-    FILE * gbk_file = fopen("../../data/gbk_char.table", "r");
-    if ( gbk_file == NULL ){
-       fprintf(stderr, "can't open gbk_char.table!\n");
-       exit(ENOENT);
-    }
-    g_phrases->load_text(gbk_file);
-    fclose(gbk_file);
+    MemoryChunk * chunk = new MemoryChunk;
+    chunk->load("../../data/phrase_index.bin");
+    g_phrases->load(chunk);
 
     FacadePhraseIndex phrase_index;
     
     //gb_char binary file
-    MemoryChunk * chunk = new MemoryChunk;
+    chunk = new MemoryChunk;
     chunk->load("../../data/gb_char.bin");
     phrase_index.load(1, chunk);