From 54b399a04fff283dec5299fc9f1d66985456754d Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Mon, 25 Apr 2011 15:54:42 +0800 Subject: [PATCH] define k mixture model bi-gram --- utils/training/k_mixture_model.h | 41 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/utils/training/k_mixture_model.h b/utils/training/k_mixture_model.h index c482a2c..0b19af6 100644 --- a/utils/training/k_mixture_model.h +++ b/utils/training/k_mixture_model.h @@ -24,6 +24,7 @@ #define K_MIXTURE_MODEL #include "novel_types.h" +#include "flexible_ngram.h" namespace pinyin{ @@ -104,6 +105,46 @@ static inline parameter_t compute_Pr_G_2_with_count(corpus_count_t k, return compute_Pr_G_2(k, alpha, B); } +typedef struct{ + /* the total number of instances of all words. */ + guint32 m_WC; + /* the total number of documents. */ + guint32 m_N; +} KMixtureModelMagicHeader; + +typedef struct{ + /* the total number of instances of word W1. */ + guint32 m_WC; +} KMixtureModelArrayHeader; + +typedef struct{ + /* the total number of all W1,W2 word pair. */ + guint32 m_WC; + + /* the total number of instances of the word or phrases. + (two word phrase) */ + guint32 m_T; /* alias of m_WC, always the same. */ + /* n_r: the number of documents having exactly r occurrences. */ + guint32 m_n_0; + guint32 m_n_1; + + /* maximum instances of the word or phrase (two word phrase) + in previous documents last seen. */ + guint32 m_Mr; +} KMixtureModelArrayItem; + +typedef FlexibleBigram +KMixtureModelBigram; + +typedef FlexibleSingleGram +KMixtureModelSingleGram; + +typedef KMixtureModelSingleGram::ArrayItemWithToken +KMixtureModelArrayItemWithToken; + }; -- 2.7.4