refine attach method in flexible n-gram
authorPeng Wu <alexepico@gmail.com>
Thu, 5 May 2011 03:13:34 +0000 (11:13 +0800)
committerPeng Wu <alexepico@gmail.com>
Thu, 5 May 2011 03:13:34 +0000 (11:13 +0800)
src/include/novel_types.h
src/storage/flexible_ngram.h
src/storage/ngram.h
tests/storage/test_flexible_ngram.cpp
utils/training/estimate_k_mixture_model.cpp
utils/training/prune_k_mixture_model.cpp

index 82a99476454fe67ca471b8e933b77c60ec692cde..9f1a9faa71f8758501f4a5f6534bc31080c5d452 100644 (file)
@@ -82,7 +82,7 @@ enum RemoveIndexResult{
 };
 
 /* For Phrase Index */
-enum PhraseIndexError{
+enum PhraseIndexResult{
     ERROR_OK = 0,                /* operate ok */
     ERROR_NO_SUB_PHRASE_INDEX,   /* sub phrase index is not loaded */
     ERROR_NO_ITEM,               /* item has a null slot */
@@ -91,6 +91,13 @@ enum PhraseIndexError{
     ERROR_INTEGER_OVERFLOW       /* integer is overflowed */
 };
 
+/* For N-gram */
+enum ATTACH_FLAG{
+    ATTACH_READONLY = 1,
+    ATTACH_READWRITE = 0x1 << 1,
+    ATTACH_CREATE = 0x1 << 2,
+};
+
 /*
  *  n-gram Definition
  *  no B parameter(there are duplicated items in uni-gram and bi-gram)
index 6f3c7787abf40c3a37d8ef73ef41201c7f2418bc..e5a086b00d4a72f4ca6fec5b50d09746198e15d7 100644 (file)
@@ -275,18 +275,26 @@ public:
     }
 
     /* attach berkeley db on filesystem for training purpose. */
-    bool attach(const char * dbfile){
+    bool attach(const char * dbfile, guint32 flags){
         reset();
+        u_int32_t db_flags = 0;
+
+        if ( flags & ATTACH_READONLY )
+            db_flags |= DB_RDONLY;
+        if ( flags & ATTACH_READWRITE )
+            assert( !(flags & ATTACH_READONLY ) );
+
         if ( !dbfile )
             return false;
         int ret = db_create(&m_db, NULL, 0);
         if ( ret != 0 )
             assert(false);
 
-        ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, 0, 0644);
-        if ( ret != 0 ) {
+        ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, db_flags, 0644);
+        if ( ret != 0 && (flags & ATTACH_CREATE) ) {
+            db_flags |= DB_CREATE;
             /* Create database file here, and write the signature. */
-            ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, DB_CREATE, 0644);
+            ret = m_db->open(m_db, NULL, dbfile, NULL, DB_HASH, db_flags, 0644);
             if ( ret != 0 )
                 return false;
 
index 93e6ad76516671dade6ddf9a12994ef3980d7b68..a2bc7b67d4d7e120a39aa18bed88f886e2b77800 100644 (file)
 
 namespace pinyin{
 
-enum {
-    ATTACH_READONLY = 1,
-    ATTACH_READWRITE = 0x1 << 1,
-    ATTACH_CREATE = 0x1 << 2,
-};
-
 class Bigram;
 
 /* Note:
index 8852f591f126d69ff21b7f226354d722ab2f8860..85a36acebbdd50755b75a3d8055fcc0659c70d9a 100644 (file)
@@ -36,7 +36,7 @@ int main(int argc, char * argv[]) {
     assert(freq == total_freq);
 
     FlexibleBigram<guint32, guint32, guint32> bigram("TEST");
-    assert(bigram.attach("/tmp/training.db"));
+    assert(bigram.attach("/tmp/training.db", ATTACH_READWRITE|ATTACH_CREATE));
     bigram.store(1, &single_gram);
     assert(single_gram.insert_array_item(5, 8));
     assert(single_gram.remove_array_item(1, freq));
index 12c6ac5bdc33aba1d896fbf38ed35709b8e62236..58d6b7070bc50eac08f77b1338233b1a9a188d1d 100644 (file)
@@ -121,10 +121,10 @@ int main(int argc, char * argv[]){
 
     /* TODO: magic header signature check here. */
     KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
-    bigram.attach(bigram_filename);
+    bigram.attach(bigram_filename, ATTACH_READONLY);
 
     KMixtureModelBigram deleted_bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
-    deleted_bigram.attach(deleted_bigram_filename);
+    deleted_bigram.attach(deleted_bigram_filename, ATTACH_READONLY);
 
     GArray * deleted_items = g_array_new(FALSE, FALSE, sizeof(phrase_token_t));
     deleted_bigram.get_all_items(deleted_items);
index 7a724a9cc6e1725a102ff3d3798a34db337d62cc..09243f3a10ac24f516ad939b20f686ec019e5e8b 100644 (file)
@@ -90,7 +90,7 @@ int main(int argc, char * argv[]){
 
     /* TODO: magic header signature check here. */
     KMixtureModelBigram bigram(K_MIXTURE_MODEL_MAGIC_NUMBER);
-    bigram.attach(bigram_filename);
+    bigram.attach(bigram_filename, ATTACH_READWRITE);
 
     KMixtureModelMagicHeader magic_header;
     bigram.get_magic_header(magic_header);