1 /* vim:set et ts=4 sts=4:
3 * ibus-pinyin - The Chinese PinYin engine for IBus
5 * Copyright (c) 2008-2010 Peng Huang <shawn.p.huang@gmail.com>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 #include "PYDatabase.h"
24 #include "PYPinyinArray.h"
28 #define DB_CACHE_SIZE "5000"
29 #define DB_INDEX_SIZE (3)
31 #define DB_COLUMN_USER_FREQ (0)
32 #define DB_COLUMN_PHRASE (1)
33 #define DB_COLUMN_FREQ (2)
34 #define DB_COLUMN_S0 (3)
36 #define DB_PREFETCH_LEN (6)
38 boost::scoped_ptr<Database> Database::m_instance;
40 class Conditions : public std::vector<std::string> {
42 Conditions (void) : std::vector<std::string> (1) {}
54 const std::string & value = std::vector<std::string>::at (--i);
60 void appendVPrintf (gint begin, gint end, const gchar *fmt, va_list args) {
62 g_vsnprintf (str, sizeof(str), fmt, args);
63 for (gint i = begin; i < end; i++) {
68 void appendPrintf (gint begin, gint end, const gchar *fmt, ...) {
71 appendVPrintf (begin, end, fmt, args);
79 : m_db (db), m_stmt (NULL) {
80 g_assert (m_db != NULL);
85 if (sqlite3_finalize (m_stmt) != SQLITE_OK) {
86 g_warning ("destroy sqlite stmt failed!");
91 gboolean prepare (const String &sql) {
92 if (sqlite3_prepare (m_db,
97 g_warning ("parse sql failed!\n %s", sql.c_str ());
104 gboolean step (void) {
105 switch (sqlite3_step (m_stmt)) {
111 g_warning ("sqlites step error!");
116 const gchar *columnText (guint col) {
117 return (const gchar *) sqlite3_column_text (m_stmt, col);
120 gint columnInt (guint col) {
121 return sqlite3_column_int (m_stmt, col);
126 sqlite3_stmt *m_stmt;
129 Query::Query (const PinyinArray & pinyin,
134 m_pinyin_begin (pinyin_begin),
135 m_pinyin_len (pinyin_len),
138 g_assert (m_pinyin.size () >= pinyin_begin + pinyin_len);
146 Query::fill (PhraseArray &phrases, gint count)
150 while (m_pinyin_len > 0) {
151 if (G_LIKELY (m_stmt.get () == NULL)) {
152 m_stmt = Database::instance ().query (m_pinyin, m_pinyin_begin, m_pinyin_len, -1, m_option);
153 g_assert (m_stmt.get () != NULL);
156 while (m_stmt->step ()) {
159 g_strlcpy (phrase.phrase,
160 m_stmt->columnText (DB_COLUMN_PHRASE),
161 sizeof (phrase.phrase));
162 phrase.freq = m_stmt->columnInt (DB_COLUMN_FREQ);
163 phrase.user_freq = m_stmt->columnInt (DB_COLUMN_USER_FREQ);
164 phrase.len = m_pinyin_len;
166 for (guint i = 0, column = DB_COLUMN_S0; i < m_pinyin_len; i++) {
167 phrase.pinyin_id[i].sheng = m_stmt->columnInt (column++);
168 phrase.pinyin_id[i].yun = m_stmt->columnInt (column++);
171 phrases.push_back (phrase);
173 if (G_UNLIKELY (row == count)) {
185 Database::Database (void)
191 Database::~Database (void)
194 if (sqlite3_close (m_db) != SQLITE_OK) {
195 g_warning ("close sqlite database failed!");
201 Database::executeSQL (const gchar *sql)
204 if (sqlite3_exec (m_db, sql, NULL, NULL, &errmsg) != SQLITE_OK) {
205 g_warning ("%s: %s", errmsg, sql);
206 sqlite3_free (errmsg);
213 Database::open (void)
217 #if (SQLITE_VERSION_NUMBER >= 3006000)
218 sqlite3_initialize ();
221 static const gchar * maindb [] = {
222 PKGDATADIR"/db/local.db",
223 PKGDATADIR"/db/open-phrase.db",
224 PKGDATADIR"/db/android.db",
229 for (i = 0; i < G_N_ELEMENTS (maindb); i++) {
230 if (!g_file_test(maindb[i], G_FILE_TEST_IS_REGULAR))
232 if (sqlite3_open_v2 (maindb[i], &m_db,
233 SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, NULL) == SQLITE_OK) {
234 g_message ("Use database %s", maindb[i]);
239 if (i == G_N_ELEMENTS (maindb)) {
240 g_warning ("can not open main database");
247 /* Set synchronous=OFF, write user database will become much faster.
248 * It will cause user database corrupted, if the operatering system
249 * crashes or computer loses power.
251 m_sql << "PRAGMA synchronous=NORMAL;\n";
254 /* Set the cache size for better performance */
255 m_sql << "PRAGMA cache_size=" DB_CACHE_SIZE ";\n";
257 /* Using memory for temp store */
258 m_sql << "PRAGMA temp_store=MEMORY;\n";
260 /* Set journal mode */
261 m_sql << "PRAGMA journal_mode=PERSIST;\n";
263 /* Using EXCLUSIVE locking mode on databases
264 * for better performance */
265 m_sql << "PRAGMA locking_mode=EXCLUSIVE;\n";
266 if (!executeSQL (m_sql))
269 /* Attach user database */
270 m_buffer = g_get_user_cache_dir ();
271 m_buffer << G_DIR_SEPARATOR_S << "ibus"
272 << G_DIR_SEPARATOR_S << "pinyin";
273 g_mkdir_with_parents (m_buffer, 0750);
274 m_buffer << G_DIR_SEPARATOR_S << "user-1.3.db";
275 retval = openUserDB (m_buffer);
277 g_warning ("Can not open user database %s", m_buffer.c_str ());
278 if (!openUserDB (":memory:"))
282 /* prefetch some tables */
289 sqlite3_close (m_db);
296 Database::openUserDB (const gchar *userdb)
298 m_sql.printf ("ATTACH DATABASE \"%s\" AS userdb;", userdb);
299 if (!executeSQL (m_sql))
302 m_sql = "BEGIN TRANSACTION;\n";
303 /* create desc table*/
304 m_sql << "CREATE TABLE IF NOT EXISTS userdb.desc (name PRIMARY KEY, value TEXT);\n";
305 m_sql << "INSERT OR IGNORE INTO userdb.desc VALUES " << "('version', '1.2.0');\n"
306 << "INSERT OR IGNORE INTO userdb.desc VALUES " << "('uuid', '" << UUID () << "');\n"
307 << "INSERT OR IGNORE INTO userdb.desc VALUES " << "('hostname', '" << Hostname () << "');\n"
308 << "INSERT OR IGNORE INTO userdb.desc VALUES " << "('username', '" << Env ("USERNAME") << "');\n"
309 << "INSERT OR IGNORE INTO userdb.desc VALUES " << "('create-time', datetime());\n"
310 << "INSERT OR IGNORE INTO userdb.desc VALUES " << "('attach-time', datetime());\n";
312 /* create phrase tables */
313 for (guint i = 0; i < MAX_PHRASE_LEN; i++) {
314 m_sql.appendPrintf ("CREATE TABLE IF NOT EXISTS userdb.py_phrase_%d (user_freq, phrase TEXT, freq INTEGER ", i);
315 for (guint j = 0; j <= i; j++)
316 m_sql.appendPrintf (",s%d INTEGER, y%d INTEGER", j, j);
321 m_sql << "CREATE UNIQUE INDEX IF NOT EXISTS " << "userdb.index_0_0 ON py_phrase_0(s0,y0,phrase);\n";
322 m_sql << "CREATE UNIQUE INDEX IF NOT EXISTS " << "userdb.index_1_0 ON py_phrase_1(s0,y0,s1,y1,phrase);\n";
323 m_sql << "CREATE INDEX IF NOT EXISTS " << "userdb.index_1_1 ON py_phrase_1(s0,s1,y1);\n";
324 for (guint i = 2; i < MAX_PHRASE_LEN; i++) {
325 m_sql << "CREATE UNIQUE INDEX IF NOT EXISTS " << "userdb.index_" << i << "_0 ON py_phrase_" << i
327 for (guint j = 1; j <= i; j++)
328 m_sql << ",s" << j << ",y" << j;
329 m_sql << ",phrase);\n";
330 m_sql << "CREATE INDEX IF NOT EXISTS " << "userdb.index_" << i << "_1 ON py_phrase_" << i << "(s0,s1,s2,y2);\n";
334 if (!executeSQL (m_sql))
337 m_sql = "UPDATE userdb.desc SET value=datetime() WHERE name='attach-time';";
339 if (!executeSQL (m_sql))
345 m_sql = "DETACH DATABASE userdb;";
351 Database::prefetch (void)
354 for (guint i = 0; i < DB_PREFETCH_LEN; i++)
355 m_sql << "SELECT * FROM py_phrase_" << i << ";\n";
357 // g_debug ("prefetching ...");
362 inline static gboolean
363 pinyin_option_check_sheng (guint option, gint id, gint fid)
365 switch ((id << 16) | fid) {
366 case (PINYIN_ID_C << 16) | PINYIN_ID_CH:
367 return (option & PINYIN_FUZZY_C_CH);
368 case (PINYIN_ID_CH << 16) | PINYIN_ID_C:
369 return (option & PINYIN_FUZZY_CH_C);
370 case (PINYIN_ID_Z << 16) | PINYIN_ID_ZH:
371 return (option & PINYIN_FUZZY_Z_ZH);
372 case (PINYIN_ID_ZH << 16) | PINYIN_ID_Z:
373 return (option & PINYIN_FUZZY_ZH_Z);
374 case (PINYIN_ID_S << 16) | PINYIN_ID_SH:
375 return (option & PINYIN_FUZZY_S_SH);
376 case (PINYIN_ID_SH << 16) | PINYIN_ID_S:
377 return (option & PINYIN_FUZZY_SH_S);
378 case (PINYIN_ID_L << 16) | PINYIN_ID_N:
379 return (option & PINYIN_FUZZY_L_N);
380 case (PINYIN_ID_N << 16) | PINYIN_ID_L:
381 return (option & PINYIN_FUZZY_N_L);
382 case (PINYIN_ID_F << 16) | PINYIN_ID_H:
383 return (option & PINYIN_FUZZY_F_H);
384 case (PINYIN_ID_H << 16) | PINYIN_ID_F:
385 return (option & PINYIN_FUZZY_H_F);
386 case (PINYIN_ID_L << 16) | PINYIN_ID_R:
387 return (option & PINYIN_FUZZY_L_R);
388 case (PINYIN_ID_R << 16) | PINYIN_ID_L:
389 return (option & PINYIN_FUZZY_R_L);
390 case (PINYIN_ID_K << 16) | PINYIN_ID_G:
391 return (option & PINYIN_FUZZY_K_G);
392 case (PINYIN_ID_G << 16) | PINYIN_ID_K:
393 return (option & PINYIN_FUZZY_G_K);
394 default: return FALSE;
398 inline static gboolean
399 pinyin_option_check_yun (guint option, gint id, gint fid)
401 switch ((id << 16) | fid) {
402 case (PINYIN_ID_AN << 16) | PINYIN_ID_ANG:
403 return (option & PINYIN_FUZZY_AN_ANG);
404 case (PINYIN_ID_ANG << 16) | PINYIN_ID_AN:
405 return (option & PINYIN_FUZZY_ANG_AN);
406 case (PINYIN_ID_EN << 16) | PINYIN_ID_ENG:
407 return (option & PINYIN_FUZZY_EN_ENG);
408 case (PINYIN_ID_ENG << 16) | PINYIN_ID_EN:
409 return (option & PINYIN_FUZZY_ENG_EN);
410 case (PINYIN_ID_IN << 16) | PINYIN_ID_ING:
411 return (option & PINYIN_FUZZY_IN_ING);
412 case (PINYIN_ID_ING << 16) | PINYIN_ID_IN:
413 return (option & PINYIN_FUZZY_ING_IN);
414 case (PINYIN_ID_IAN << 16) | PINYIN_ID_IANG:
415 return (option & PINYIN_FUZZY_IAN_IANG);
416 case (PINYIN_ID_IANG << 16) | PINYIN_ID_IAN:
417 return (option & PINYIN_FUZZY_IANG_IAN);
418 case (PINYIN_ID_UAN << 16) | PINYIN_ID_UANG:
419 return (option & PINYIN_FUZZY_UAN_UANG);
420 case (PINYIN_ID_UANG << 16) | PINYIN_ID_UAN:
421 return (option & PINYIN_FUZZY_UANG_UAN);
422 default: return FALSE;
427 Database::query (const PinyinArray &pinyin,
433 g_assert (pinyin_begin < pinyin.size ());
434 g_assert (pinyin_len <= pinyin.size () - pinyin_begin);
435 g_assert (pinyin_len <= MAX_PHRASE_LEN);
438 Conditions conditions;
440 for (guint i = 0; i < pinyin_len; i++) {
443 p = pinyin[i + pinyin_begin];
445 fs1 = pinyin_option_check_sheng (option, p->pinyin_id[0].sheng, p->pinyin_id[1].sheng);
446 fs2 = pinyin_option_check_sheng (option, p->pinyin_id[0].sheng, p->pinyin_id[2].sheng);
448 if (G_LIKELY (i > 0))
449 conditions.appendPrintf (0, conditions.size (),
452 if (G_UNLIKELY (fs1 || fs2)) {
453 if (G_LIKELY (i < DB_INDEX_SIZE)) {
454 if (fs1 && fs2 == 0) {
455 conditions.double_ ();
456 conditions.appendPrintf (0, conditions.size () >> 1,
457 "s%d=%d", i, p->pinyin_id[0].sheng);
458 conditions.appendPrintf (conditions.size () >> 1, conditions.size (),
459 "s%d=%d", i, p->pinyin_id[1].sheng);
461 else if (fs1 == 0 && fs2) {
462 conditions.double_ ();
463 conditions.appendPrintf (0, conditions.size () >> 1,
464 "s%d=%d", i, p->pinyin_id[0].sheng);
465 conditions.appendPrintf (conditions.size () >> 1, conditions.size (),
466 "s%d=%d", i, p->pinyin_id[2].sheng);
469 gint len = conditions.size ();
470 conditions.triple ();
471 conditions.appendPrintf (0, len,
472 "s%d=%d", i, p->pinyin_id[0].sheng);
473 conditions.appendPrintf (len, len << 1,
474 "s%d=%d", i, p->pinyin_id[1].sheng);
475 conditions.appendPrintf (len << 1, conditions.size (),
476 "s%d=%d", i, p->pinyin_id[2].sheng);
480 if (fs1 && fs2 == 0) {
481 conditions.appendPrintf (0, conditions.size (),
482 "s%d IN (%d,%d)", i, p->pinyin_id[0].sheng, p->pinyin_id[1].sheng);
484 else if (fs1 == 0 && fs2) {
485 conditions.appendPrintf (0, conditions.size (),
486 "s%d IN (%d,%d)", i, p->pinyin_id[0].sheng, p->pinyin_id[2].sheng);
489 conditions.appendPrintf (0, conditions.size (),
490 "s%d IN (%d,%d,%d)", i, p->pinyin_id[0].sheng, p->pinyin_id[1].sheng, p->pinyin_id[2].sheng);
495 conditions.appendPrintf (0, conditions.size (),
496 "s%d=%d", i, p->pinyin_id[0].sheng);
499 if (p->pinyin_id[0].yun != PINYIN_ID_ZERO) {
500 if (pinyin_option_check_yun (option, p->pinyin_id[0].yun, p->pinyin_id[1].yun)) {
501 if (G_LIKELY (i < DB_INDEX_SIZE)) {
502 conditions.double_ ();
503 conditions.appendPrintf (0, conditions.size () >> 1,
504 " AND y%d=%d", i, p->pinyin_id[0].yun);
505 conditions.appendPrintf (conditions.size () >> 1, conditions.size (),
506 " and y%d=%d", i, p->pinyin_id[1].yun);
509 conditions.appendPrintf (0, conditions.size (),
510 " AND y%d IN (%d,%d)", i, p->pinyin_id[0].yun, p->pinyin_id[1].yun);
514 conditions.appendPrintf (0, conditions.size (),
515 " AND y%d=%d", i, p->pinyin_id[0].yun);
522 for (guint i = 0; i < conditions.size (); i++) {
523 if (G_UNLIKELY (i == 0))
524 m_buffer << " (" << conditions[i] << ")\n";
526 m_buffer << " OR (" << conditions[i] << ")\n";
530 gint id = pinyin_len - 1;
531 m_sql << "SELECT * FROM ("
532 "SELECT 0 AS user_freq, * FROM main.py_phrase_" << id << " WHERE " << m_buffer << " UNION ALL "
533 "SELECT * FROM userdb.py_phrase_" << id << " WHERE " << m_buffer << ") "
534 "GROUP BY phrase ORDER BY user_freq DESC, freq DESC";
536 m_sql << " LIMIT " << m;
538 g_debug ("sql =\n%s", m_sql.c_str ());
542 SQLStmtPtr stmt (new SQLStmt (m_db));
544 if (!stmt->prepare (m_sql)) {
552 Database::phraseWhereSql (const Phrase & p, String & sql)
555 sql << " s0=" << p.pinyin_id[0].sheng
556 << " AND y0=" << p.pinyin_id[0].yun;
557 for (guint i = 1; i < p.len; i++) {
558 sql << " AND s" << i << '=' << p.pinyin_id[i].sheng
559 << " AND y" << i << '=' << p.pinyin_id[i].yun;
561 sql << " AND phrase=\"" << p.phrase << "\"";
566 Database::phraseSql (const Phrase & p, String & sql)
568 sql << "INSERT OR IGNORE INTO userdb.py_phrase_" << p.len - 1
569 << " VALUES(" << 0 /* user_freq */
570 << ",\"" << p.phrase << '"' /* phrase */
571 << ',' << p.freq; /* freq */
573 for (guint i = 0; i < p.len; i++) {
574 sql << ',' << p.pinyin_id[i].sheng << ',' << p.pinyin_id[i].yun;
579 sql << "UPDATE userdb.py_phrase_" << p.len - 1
580 << " SET user_freq=user_freq+1";
582 phraseWhereSql (p, sql);
587 Database::commit (const PhraseArray &phrases)
589 Phrase phrase = {""};
591 m_sql = "BEGIN TRANSACTION;\n";
592 for (guint i = 0; i < phrases.size (); i++) {
593 phrase += phrases[i];
594 phraseSql (phrases[i], m_sql);
596 if (phrases.size () > 1)
597 phraseSql (phrase, m_sql);
598 m_sql << "COMMIT;\n";
604 Database::remove (const Phrase & phrase)
606 m_sql = "BEGIN TRANSACTION;\n";
607 m_sql << "DELETE FROM userdb.py_phrase_" << phrase.len - 1;
608 phraseWhereSql (phrase, m_sql);
610 m_sql << "COMMIT;\n";
616 Database::init (void)
618 if (m_instance == NULL) {
619 m_instance.reset (new Database ());