From: Minje Ahn Date: Tue, 29 Mar 2022 02:46:17 +0000 (+0900) Subject: Add plugin APIs for ebook db X-Git-Tag: accepted/tizen/unified/20220501.223620^0 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=fded5a0003873d09f23fc233c7342f8083d1b961;p=platform%2Fcore%2Fmultimedia%2Flibmedia-service.git Add plugin APIs for ebook db Change-Id: I117d58beba7872bc227da43fdad7937f1bbfdc03 Signed-off-by: Minje Ahn --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 738a8d7..924d079 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,7 +53,7 @@ EXEC_PROGRAM("${UNAME}" ARGS "-m" OUTPUT_VARIABLE "ARCH") INCLUDE(FindPkgConfig) -pkg_check_modules(pkgs REQUIRED glib-2.0 dlog sqlite3 icu-i18n libexif mm-fileinfo libmedia-utils aul uuid vconf libxml-2.0 libzip capi-system-info mmutil-magick media-thumbnail libpodofo) +pkg_check_modules(pkgs REQUIRED glib-2.0 dlog sqlite3 icu-i18n libexif mm-fileinfo libmedia-utils aul uuid vconf libxml-2.0 libzip capi-system-info mmutil-magick media-thumbnail libpodofo iniparser) FOREACH(flag ${pkgs_CFLAGS}) SET(EXTRA_CFLAGS "${EXTRA_CFLAGS} ${flag}") diff --git a/include/media-svc.h b/include/media-svc.h index 94bb2fc..3c523c5 100755 --- a/include/media-svc.h +++ b/include/media-svc.h @@ -75,7 +75,7 @@ int media_svc_send_query(uid_t uid); int media_svc_get_media_type(const char *path, int *mediatype); int media_svc_create_thumbnail(const char *file_path, int media_type, uid_t uid, char **thumbnail_path); -int media_svc_get_book_by_keyword(sqlite3 *handle, const char *keyword, GList **result); +int media_svc_get_book_by_keyword(sqlite3 *handle, const char *keyword, uid_t uid, GList **result); #ifdef __cplusplus } diff --git a/packaging/libmedia-service.spec b/packaging/libmedia-service.spec index 523e17e..97ceb64 100644 --- a/packaging/libmedia-service.spec +++ b/packaging/libmedia-service.spec @@ -1,6 +1,6 @@ Name: libmedia-service Summary: Media information service library for multimedia applications -Version: 0.4.21 +Version: 0.4.22 Release: 0 Group: Multimedia/Libraries License: Apache-2.0 and PD @@ -27,6 +27,7 @@ BuildRequires: pkgconfig(capi-system-info) BuildRequires: pkgconfig(mmutil-magick) BuildRequires: pkgconfig(media-thumbnail) BuildRequires: pkgconfig(libpodofo) +BuildRequires: pkgconfig(iniparser) %if 0%{?gtests:1} BuildRequires: pkgconfig(gmock) diff --git a/plugin/media-ebook-plugin.cpp b/plugin/media-ebook-plugin.cpp index 49ea76b..57776ac 100644 --- a/plugin/media-ebook-plugin.cpp +++ b/plugin/media-ebook-plugin.cpp @@ -29,31 +29,97 @@ #include #include +#include +#include #ifdef LOG_TAG #undef LOG_TAG #endif #define LOG_TAG "MEDIA_SERVICE" +#define INSERT_QUERY "INSERT INTO words(file_id, word) SELECT id, ? FROM files WHERE path=? ON CONFLICT (file_id, word) DO UPDATE SET frequency=frequency+1;" +#define TOKEN_KEY "\\s+" +#define SPECIAL_CHAR "[\\{\\}\\[\\]\\/?.,;:|\\)*~`!^\\-_+<>@\\#$%&\\\\=\\(\\\'\\\"]" class TextFinderInterface { public: virtual ~TextFinderInterface() = default; virtual bool find(const char *keyword) = 0; + virtual void insert() = 0; }; -/*---------------- PDF -----------------------*/ +class TextFinder : public TextFinderInterface +{ +public: + virtual ~TextFinder() = default; + bool match(std::string& text, const char *keyword); + void batchInsert(std::string& text); + + sqlite3 *dbHandle {}; + const char *filePath {}; +}; -class PdfTextFinder : public TextFinderInterface +bool TextFinder::match(std::string& text, const char *keyword) +{ + if (!keyword) + return false; + + if (text.empty()) + return false; + + std::regex re(keyword, std::regex::icase); + + if (std::regex_search(text, re)) { + LOGD("Found [%s]", keyword); + return true; + } + + return false; +} + +void TextFinder::batchInsert(std::string& text) +{ + if (!dbHandle || !filePath || text.empty()) + return; + + sqlite3_stmt *stmt = NULL; + const std::regex sp(SPECIAL_CHAR); + std::string temp = std::regex_replace(text, sp, ""); + bool isTransaction = false; + + if (sqlite3_exec(dbHandle, "BEGIN;", NULL, NULL, NULL) == SQLITE_OK) + isTransaction = true; + + sqlite3_prepare_v2(dbHandle, INSERT_QUERY, -1, &stmt, NULL); + + const std::regex re(TOKEN_KEY); + std::sregex_token_iterator end; + + for (std::sregex_token_iterator i(temp.begin(), temp.end(), re, -1); i != end; ++i) { + sqlite3_bind_text(stmt, 1, (*i).str().c_str(), -1, SQLITE_TRANSIENT); + sqlite3_bind_text(stmt, 2, filePath, -1, SQLITE_TRANSIENT); + sqlite3_step(stmt); + sqlite3_reset(stmt); + } + + sqlite3_finalize(stmt); + + if (isTransaction) + sqlite3_exec(dbHandle, "COMMIT;", NULL, NULL, NULL); +} + +/*---------------- PDF -----------------------*/ +class PdfTextFinder : public TextFinder { public: explicit PdfTextFinder(const char *path); + PdfTextFinder(sqlite3 *handle, const char *path); bool find(const char *keyword) override; + void insert() override; private: std::string parseTextFromPage(unsigned int index); - bool match(std::string& text, const char *keyword); bool loaded {}; PoDoFo::PdfMemDocument pdf {}; @@ -76,6 +142,30 @@ PdfTextFinder::PdfTextFinder(const char *path) } } +PdfTextFinder::PdfTextFinder(sqlite3 *handle, const char *path) +{ + if (!handle) { + LOGE("invalid handle"); + return; + } + + if (!path) { + LOGE("invalid path"); + return; + } + + LOGD("%s", path); + + try { + pdf.Load(path); + loaded = true; + dbHandle = handle; + filePath = path; + } catch (const PoDoFo::PdfError& e) { + LOGE("Initialization failed : %s", e.what()); + } +} + bool PdfTextFinder::find(const char *keyword) { if (!loaded) @@ -95,6 +185,17 @@ bool PdfTextFinder::find(const char *keyword) return false; } +void PdfTextFinder::insert() +{ + if (!loaded) + return; + + for (int n = 0; n < pdf.GetPageCount(); ++n) { + auto text = parseTextFromPage(n); + batchInsert(text); + } +} + std::string PdfTextFinder::parseTextFromPage(unsigned int index) { std::string fullText; @@ -173,41 +274,25 @@ std::string PdfTextFinder::parseTextFromPage(unsigned int index) return fullText; } -/* ToDo : match can be passed to EbookText */ -bool PdfTextFinder::match(std::string& text, const char *keyword) -{ - if (!keyword) - return false; - - if (text.empty()) - return false; - - std::regex re(keyword, std::regex::icase); - - if (std::regex_search(text, re)) { - LOGD("Found [%s]", keyword); - return true; - } - - return false; -} - /*---------------- EPUB -----------------------*/ -class EpubTextFinder : public TextFinderInterface +class EpubTextFinder : public TextFinder { public: explicit EpubTextFinder(const char *path); + EpubTextFinder(sqlite3 *handle, const char *path); bool find(const char *keyword) override; + void insert() override; ~EpubTextFinder() override; private: - bool match(const char *text, const char *keyword); bool htmlNodeFindRecursive(xmlNodePtr node, const char *keyword); + void htmlNodeFindRecursiveForDb(xmlNodePtr node); bool htmlFind(const char *html_buf, int buf_size, const char *keyword); + void htmlFindForDb(const char *html_buf, int buf_size); - zip_t *z{}; + zip_t *z {}; }; EpubTextFinder::EpubTextFinder(const char *path) @@ -225,6 +310,29 @@ EpubTextFinder::EpubTextFinder(const char *path) LOGE("zip_open failed"); } +EpubTextFinder::EpubTextFinder(sqlite3 *handle, const char *path) +{ + if (!handle) { + LOGE("invalid handle"); + return; + } + + if (!path) { + LOGE("invalid path"); + return; + } + + LOGD("%s", path); + + int err = 0; + z = zip_open(path, ZIP_RDONLY, &err); + if (err != 0) + LOGE("zip_open failed"); + + dbHandle = handle; + filePath = path; +} + EpubTextFinder::~EpubTextFinder() { if (!z) @@ -268,30 +376,40 @@ bool EpubTextFinder::find(const char *keyword) return false; } - -bool EpubTextFinder::match(const char *text, const char *keyword) +void EpubTextFinder::insert() { - if (!keyword) - return false; + zip_stat_t sb = {0, }; - if (!text) - return false; + int entry_len = zip_get_num_entries(z, ZIP_FL_UNCHANGED); + for (int i = 0; i < entry_len; i++) { + if (!g_str_has_suffix(zip_get_name(z, i, ZIP_FL_ENC_GUESS), "html")) + continue; - std::regex re(keyword, std::regex::icase); + if (zip_stat_index(z, i, 0, &sb) != 0) + continue; - if (std::regex_search(text, re)) { - LOGD("Found [%s]", keyword); - return true; - } + zip_file_t *file = zip_fopen_index(z, i, 0); + if (!file) + continue; - return false; + std::vector file_buf(sb.size); + + zip_int64_t readn = zip_fread(file, file_buf.data(), sb.size); + zip_fclose(file); + + if (readn == static_cast(sb.size)) + htmlFindForDb(file_buf.data(), sb.size); + } } bool EpubTextFinder::htmlNodeFindRecursive(xmlNodePtr node, const char *keyword) { for (xmlNodePtr cur = node; cur; cur = cur->next) { - if (cur->type == XML_TEXT_NODE && match((const char *)cur->content, keyword)) - return true; + if (cur->type == XML_TEXT_NODE) { + std::string text(reinterpret_cast(cur->content)); + if (match(text, keyword)) + return true; + } if (htmlNodeFindRecursive(cur->children, keyword)) return true; @@ -300,6 +418,32 @@ bool EpubTextFinder::htmlNodeFindRecursive(xmlNodePtr node, const char *keyword) return false; } +void EpubTextFinder::htmlNodeFindRecursiveForDb(xmlNodePtr node) +{ + for (xmlNodePtr cur = node; cur; cur = cur->next) { + if (cur->type == XML_TEXT_NODE) { + std::string text(reinterpret_cast(cur->content)); + batchInsert(text); + } + + htmlNodeFindRecursiveForDb(cur->children); + } +} + +void EpubTextFinder::htmlFindForDb(const char *html_buf, int buf_size) +{ + htmlDocPtr doc = htmlReadMemory(html_buf, buf_size, "/", NULL, + HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET); + + if (!doc) { + LOGE("htmlReadMemory failed"); + return; + } + + htmlNodeFindRecursiveForDb(xmlDocGetRootElement(doc)); + xmlFreeDoc(doc); +} + bool EpubTextFinder::htmlFind(const char *html_buf, int buf_size, const char *keyword) { htmlDocPtr doc = htmlReadMemory(html_buf, buf_size, "/", NULL, @@ -332,3 +476,17 @@ extern "C" bool media_svc_epub_is_keyword_included(const char *path, const char return ebookText->find(keyword); } + +extern "C" void media_svc_pdf_insert_to_db(sqlite3 *handle, const char *path) +{ + std::unique_ptr ebookText = std::make_unique(handle, path); + + ebookText->insert(); +} + +extern "C" void media_svc_epub_insert_to_db(sqlite3 *handle, const char *path) +{ + std::unique_ptr ebookText = std::make_unique(handle, path); + + ebookText->insert(); +} diff --git a/src/common/media-svc-media.c b/src/common/media-svc-media.c index 87b576e..16414c0 100755 --- a/src/common/media-svc-media.c +++ b/src/common/media-svc-media.c @@ -497,4 +497,4 @@ int _media_svc_get_noti_info(sqlite3 *handle, const char *path, media_svc_noti_i SQLITE3_FINALIZE(sql_stmt); return MS_MEDIA_ERR_NONE; -} +} \ No newline at end of file diff --git a/src/common/media-svc-util.c b/src/common/media-svc-util.c index 829d892..0f754bb 100644 --- a/src/common/media-svc-util.c +++ b/src/common/media-svc-util.c @@ -1606,3 +1606,178 @@ bool _media_svc_is_keyword_included(const char *path, const char *keyword) return ret; } + +static int __media_svc_create_wordbook_db(const char *path, sqlite3 **handle) +{ + int ret = SQLITE_OK; + sqlite3 *db_handle = NULL; + char *err = NULL; + + ret = sqlite3_open_v2(path, &db_handle, SQLITE_OPEN_CREATE | SQLITE_OPEN_READWRITE, NULL); + media_svc_retvm_if(ret != SQLITE_OK, ret, "sqlite3_open_v2 failed : %d", ret); + + ret = sqlite3_exec(db_handle, "PRAGMA journal_mode = OFF;", NULL, NULL, &err); + if (ret != SQLITE_OK) + goto ERROR; + + ret = sqlite3_exec(db_handle, "CREATE TABLE IF NOT EXISTS files(id integer primary key autoincrement, path text unique, validity integer default 1);", NULL, NULL, &err); + if (ret != SQLITE_OK) + goto ERROR; + + ret = sqlite3_exec(db_handle, "CREATE TABLE IF NOT EXISTS words(file_id integer, word text, frequency integer default 1, unique(file_id, word));", NULL, NULL, &err); + if (ret != SQLITE_OK) + goto ERROR; + + ret = sqlite3_exec(db_handle, "CREATE TRIGGER IF NOT EXISTS TR_files_words DELETE ON files BEGIN DELETE FROM words WHERE file_id = old.id;END;", NULL, NULL, &err); + if (ret != SQLITE_OK) + goto ERROR; + + *handle = db_handle; + + return SQLITE_OK; + +ERROR: + media_svc_error("sqlite3_exec failed : %s", err); + SQLITE3_SAFE_FREE(err); + sqlite3_close_v2(db_handle); + + return ret; +} + +static bool __media_svc_get_wordbook_handle(uid_t uid, sqlite3 **handle) +{ + int ret = SQLITE_OK; + char *db_path = NULL; + + ms_user_get_wordbook_db_path(uid, &db_path); + if (!db_path) + return false; + + ret = sqlite3_open_v2(db_path, handle, SQLITE_OPEN_READWRITE, NULL); + if (ret != SQLITE_OK) { + ret = __media_svc_create_wordbook_db(db_path, handle); + free(db_path); + media_svc_retvm_if(ret != SQLITE_OK, false, "__media_svc_create_wordbook_db failed : %d", ret); + } else { + ret = sqlite3_exec(*handle, "PRAGMA journal_mode = OFF;", NULL, NULL, NULL); + if (ret != SQLITE_OK) + media_svc_error("Failed to change journal mode [%d]", ret); + } + + return true; +} + +static bool __media_svc_is_exist_in_wordbook(sqlite3 *db_handle, const char *path) +{ + int ret = SQLITE_OK; + char *err = NULL; + char *query = NULL; + + query = sqlite3_mprintf("UPDATE files SET validity=1 WHERE path = %Q", path); + + ret = sqlite3_exec(db_handle, query, NULL, NULL, &err); + SQLITE3_SAFE_FREE(query); + if (ret != SQLITE_OK) { + media_svc_error("Query failed. [%s]", err); + SQLITE3_SAFE_FREE(err); + return false; + } + + return sqlite3_changes(db_handle) > 0 ? true : false; +} + +static void __media_svc_insert_to_wordbook(sqlite3 *db_handle, const char *path) +{ + void *handle = NULL; + void (*svc_update) (sqlite3 *, const char *); + char *query = NULL; + + query = sqlite3_mprintf("INSERT INTO files(path) VALUES(%Q);", path); + sqlite3_exec(db_handle, query, NULL, NULL, NULL); + sqlite3_free(query); + + handle = dlopen(PATH_PLUGIN_LIB, RTLD_LAZY); + if (!handle) { + media_svc_error("dlopen failed"); + return; + } + + if (g_str_has_suffix(path, "epub") || g_str_has_suffix(path, "EPUB")) + svc_update = dlsym(handle, "media_svc_epub_insert_to_db"); + else + svc_update = dlsym(handle, "media_svc_pdf_insert_to_db"); + + if (!svc_update) { + media_svc_error("dlsym failed - %s", dlerror()); + dlclose(handle); + return; + } + + svc_update(db_handle, path); + dlclose(handle); +} + +void _media_svc_update_wordbook(const char *path, uid_t uid) +{ + sqlite3 *db_handle = NULL; + + if (!path) { + media_svc_error("Invalid path"); + return; + } + + // check db.. + if (!__media_svc_get_wordbook_handle(uid, &db_handle)) + return; + + if (__media_svc_is_exist_in_wordbook(db_handle, path)) { + sqlite3_close_v2(db_handle); + return; + } + + // if no item, insert to db.. + __media_svc_insert_to_wordbook(db_handle, path); + sqlite3_close_v2(db_handle); +} + +void _media_svc_clean_wordbook(uid_t uid) +{ + sqlite3 *db_handle = NULL; + + if (!__media_svc_get_wordbook_handle(uid, &db_handle)) + return; + + sqlite3_exec(db_handle, "DELETE FROM files where validity = 0;", NULL, NULL, NULL); + sqlite3_exec(db_handle, "UPDATE files SET validity = 0;", NULL, NULL, NULL); + sqlite3_close_v2(db_handle); +} + +bool _media_svc_get_matched_list(const char *keyword, uid_t uid, GList **list) +{ + int ret = SQLITE_OK; + sqlite3 *handle = NULL; + sqlite3_stmt *stmt = NULL; + char *query = NULL; + + media_svc_retvm_if(!list, false, "list is NULL"); + media_svc_retvm_if(!keyword, false, "keyword is NULL"); + media_svc_retvm_if(!__media_svc_get_wordbook_handle(uid, &handle), false, "Failed to get handle"); + + query = sqlite3_mprintf("SELECT files.path FROM files JOIN (SELECT file_id, sum(frequency) AS freq_sum FROM words WHERE word LIKE '%q%%' GROUP BY file_id ORDER BY freq_sum DESC) w ON files.id = w.file_id;", keyword); + ret = sqlite3_prepare_v2(handle, query, -1, &stmt, NULL); + SQLITE3_SAFE_FREE(query); + + if (ret != SQLITE_OK) { + media_svc_error("Query failed[%d]", ret); + sqlite3_close_v2(handle); + return false; + } + + while (sqlite3_step(stmt) == SQLITE_ROW) + *list = g_list_append(*list, g_strdup((char *)sqlite3_column_text(stmt, 0))); + + sqlite3_finalize(stmt); + sqlite3_close_v2(handle); + + return true; +} \ No newline at end of file diff --git a/src/common/media-svc.c b/src/common/media-svc.c index d238e09..a652e4c 100755 --- a/src/common/media-svc.c +++ b/src/common/media-svc.c @@ -29,6 +29,10 @@ #include "media-svc-noti.h" #include "media-svc-storage.h" +#include + +#define CONTENT_INI_DEFAULT_PATH SYSCONFDIR"/multimedia/media_content_config.ini" + //static __thread int g_media_svc_data_cnt = 0; static __thread int g_media_svc_cur_data_cnt = 0; @@ -782,13 +786,33 @@ int media_svc_create_thumbnail(const char *file_path, int media_type, uid_t uid, return ret; } -int media_svc_get_book_by_keyword(sqlite3 *handle, const char *keyword, GList **result) +static int __media_svc_get_ebook_search_type(void) +{ + dictionary *dict = NULL; + static int _ebook_search_type = -1; + + if (_ebook_search_type == -1) { + dict = iniparser_load(CONTENT_INI_DEFAULT_PATH); + if (!dict) { + media_svc_error("%s load failed. Use direct search.", CONTENT_INI_DEFAULT_PATH); + return MEDIA_SVC_SEARCH_TYPE_DIRECT; + } + + _ebook_search_type = iniparser_getint(dict, "media-content-config:ebook_search_type", 0); + media_svc_debug("ebook_search_type [%d]", _ebook_search_type); + + iniparser_freedict(dict); + } + + return _ebook_search_type; +} + +int media_svc_get_book_by_keyword(sqlite3 *handle, const char *keyword, uid_t uid, GList **result) { int ret = MS_MEDIA_ERR_NONE; GList *item_list = NULL; GList *iter = NULL; char *query = NULL; - char *tmp_path = NULL; media_svc_retvm_if(!handle, MS_MEDIA_ERR_INVALID_PARAMETER, "db handle is NULL"); media_svc_retvm_if(!keyword, MS_MEDIA_ERR_INVALID_PARAMETER, "keyword is NULL"); @@ -800,11 +824,19 @@ int media_svc_get_book_by_keyword(sqlite3 *handle, const char *keyword, GList ** ret = _media_svc_get_media(handle, query, &item_list); media_svc_retvm_if(ret != MS_MEDIA_ERR_NONE, ret, "_media_svc_get_media failed"); - for (iter = item_list; iter; iter = g_list_next(iter)) { - tmp_path = (char *)iter->data; + if (__media_svc_get_ebook_search_type() == MEDIA_SVC_SEARCH_TYPE_DB) { + for (iter = item_list; iter; iter = g_list_next(iter)) + _media_svc_update_wordbook((char *)iter->data, uid); - if (_media_svc_is_keyword_included(tmp_path, keyword)) - *result = g_list_append(*result, g_strdup(tmp_path)); + _media_svc_clean_wordbook(uid); + + if (!_media_svc_get_matched_list(keyword, uid, result)) + media_svc_error("_media_svc_get_matched_list failed"); + } else { + for (iter = item_list; iter; iter = g_list_next(iter)) { + if (_media_svc_is_keyword_included((char *)iter->data, keyword)) + *result = g_list_append(*result, g_strdup((gchar *)iter->data)); + } } g_list_free_full(item_list, g_free); diff --git a/src/include/common/media-svc-util.h b/src/include/common/media-svc-util.h index ad3c5b2..5d1c912 100755 --- a/src/include/common/media-svc-util.h +++ b/src/include/common/media-svc-util.h @@ -125,6 +125,11 @@ typedef enum { MEDIA_SVC_MEDIA_TYPE_BOOK = 5, /**< Book Content like epub*/ } media_svc_media_type_e; +typedef enum { + MEDIA_SVC_SEARCH_TYPE_DIRECT = 0, + MEDIA_SVC_SEARCH_TYPE_DB, +} media_svc_search_type_e; + char * _media_info_generate_uuid(void); void _media_svc_remove_file(const char *path); int _media_svc_get_thumbnail_path(char *thumb_path, const char *pathname, const char *img_format, uid_t uid); @@ -142,6 +147,9 @@ int _media_svc_extract_music_metadata_for_update(media_svc_content_info_s *conte int _media_svc_get_media_type(const char *path, int *mediatype); bool _media_svc_is_valid_storage_type(ms_user_storage_type_e storage_type); bool _media_svc_is_keyword_included(const char *path, const char *keyword); +void _media_svc_update_wordbook(const char *path, uid_t uid); +void _media_svc_clean_wordbook(uid_t uid); +bool _media_svc_get_matched_list(const char *keyword, uid_t uid, GList **list); #ifdef __cplusplus }