add special phrase index handle

author Peng Wu <alexepico@gmail.com>

Tue, 17 Aug 2010 04:32:04 +0000 (12:32 +0800)

committer Peng Wu <alexepico@gmail.com>

Tue, 17 Aug 2010 04:34:05 +0000 (12:34 +0800)
author Peng Wu <alexepico@gmail.com>
Tue, 17 Aug 2010 04:32:04 +0000 (12:32 +0800)
committer Peng Wu <alexepico@gmail.com>
Tue, 17 Aug 2010 04:34:05 +0000 (12:34 +0800)
diff --git a/utils/storage/export_interpolation.cpp b/utils/storage/export_interpolation.cpp

index b6dd86f044c8c71524eb229d17addd1e13380048..5cf0e0664c7a14447aec419585de9bd82d4d7527 100644 (file)
--- a/utils/storage/export_interpolation.cpp
+++ b/utils/storage/export_interpolation.cpp
@@ -70,6 +70,14 @@ void gen_unigram(FILE * output, FacadePhraseIndex * phrase_index) {
              assert( result == ERROR_OK);
  
              size_t freq = item.get_unigram_frequency();
+            /* deal with the special phrase index, for "<start>..." */
+            if ( i == 0 ) {
+                const char * phrase = token_to_string(j);
+                if ( NULL == phrase )
+                    continue;
+                fprintf(output, "\\item %s %d\n", phrase, freq);
+                continue;
+            }
              item.get_phrase_string(buffer);
              guint8 length = item.get_phrase_length();
              gchar * phrase = g_utf16_to_utf8(buffer, length, NULL, NULL, NULL);
@@ -82,3 +90,23 @@ void gen_unigram(FILE * output, FacadePhraseIndex * phrase_index) {
  void gen_bigram(FILE * output, Bigram * bigram){
  
  }
+
+const char * token_to_string(phrase_token_t token){
+    struct token_pair{
+        phrase_token_t token;
+        const char * string;
+    };
+
+    static const token_pair tokens [] = {
+        {sentence_start, "<start>"},
+        {0, NULL}
+    };
+
+    const token_pair * pair = tokens;
+    while (pair->token) {
+        if ( token == pair->token )
+            return pair->string;
+    }
+
+    return NULL;
+}
author	Peng Wu <alexepico@gmail.com>
	Tue, 17 Aug 2010 04:32:04 +0000 (12:32 +0800)
committer	Peng Wu <alexepico@gmail.com>
	Tue, 17 Aug 2010 04:34:05 +0000 (12:34 +0800)