Refactoring ebook plugin 49/273249/16
authorSeungbae Shin <seungbae.shin@samsung.com>
Fri, 1 Apr 2022 07:14:23 +0000 (16:14 +0900)
committerSeungbae Shin <seungbae.shin@samsung.com>
Wed, 4 May 2022 03:05:27 +0000 (12:05 +0900)
- Apply strategy pattern
- Separate files for classes and interfaces

Change-Id: Icd06ad64b41ba7f799fa445cb12e2a6af9c99fbf

CMakeLists.txt
plugin/media-ebook-plugin-dbinserter.cpp [new file with mode: 0644]
plugin/media-ebook-plugin-dbinserter.h [new file with mode: 0644]
plugin/media-ebook-plugin-epub.cpp [new file with mode: 0644]
plugin/media-ebook-plugin-epub.h [new file with mode: 0644]
plugin/media-ebook-plugin-interface.h [new file with mode: 0644]
plugin/media-ebook-plugin-pdf.cpp [new file with mode: 0644]
plugin/media-ebook-plugin-pdf.h [new file with mode: 0644]
plugin/media-ebook-plugin-regmatch.cpp [new file with mode: 0644]
plugin/media-ebook-plugin-regmatch.h [new file with mode: 0644]
plugin/media-ebook-plugin.cpp

index 924d079..9d74a25 100644 (file)
@@ -26,6 +26,10 @@ SET(HASH_SRCS
 
 SET(EBOOKPLUGIN_SRCS
        plugin/media-ebook-plugin.cpp
+       plugin/media-ebook-plugin-regmatch.cpp
+       plugin/media-ebook-plugin-dbinserter.cpp
+       plugin/media-ebook-plugin-pdf.cpp
+       plugin/media-ebook-plugin-epub.cpp
        )
 SET(CONTENTPLUGIN_SRCS
        plugin/media-content-plugin.c
diff --git a/plugin/media-ebook-plugin-dbinserter.cpp b/plugin/media-ebook-plugin-dbinserter.cpp
new file mode 100644 (file)
index 0000000..73a2d3a
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <dlog.h>
+#include <regex>
+#include <sqlite3.h>
+#include "media-ebook-plugin-dbinserter.h"
+
+#ifdef LOG_TAG
+#undef LOG_TAG
+#endif
+
+#define LOG_TAG "MEDIA_SERVICE"
+
+#define INSERT_QUERY "INSERT INTO words(file_id, word) SELECT id, ? FROM files WHERE path=? ON CONFLICT (file_id, word) DO UPDATE SET frequency=frequency+1;"
+#define TOKEN_KEY "\\s+"
+#define SPECIAL_CHAR "[\\{\\}\\[\\]\\/?.,;:|\\)*~`!^\\-_+<>@\\#$%&\\\\=\\(\\\'\\\"]"
+
+bool DbInserter::run(const std::string& text)
+{
+       if (!dbHandle || filePath.empty() || text.empty())
+               return false;
+
+       auto sqlite_handle = static_cast<sqlite3 *>(const_cast<void *>(dbHandle));
+
+       const std::regex sp(SPECIAL_CHAR);
+       std::string _text = std::regex_replace(text, sp, "");
+       const std::regex re(TOKEN_KEY);
+       auto words_begin = std::sregex_token_iterator(_text.begin(), _text.end(), re, -1);
+       auto words_end = std::sregex_token_iterator();
+
+       bool isTransaction = (sqlite3_exec(sqlite_handle, "BEGIN;", NULL, NULL, NULL) == SQLITE_OK);
+
+       sqlite3_stmt *stmt = NULL;
+       sqlite3_prepare_v2(sqlite_handle, INSERT_QUERY, -1, &stmt, NULL);
+
+       for (auto i = words_begin; i != words_end; ++i) {
+               sqlite3_bind_text(stmt, 1, (*i).str().c_str(), -1, SQLITE_TRANSIENT);
+               sqlite3_bind_text(stmt, 2, filePath.c_str(), -1, SQLITE_TRANSIENT);
+               sqlite3_step(stmt);
+               sqlite3_reset(stmt);
+       }
+
+       sqlite3_finalize(stmt);
+
+       if (isTransaction)
+               sqlite3_exec(sqlite_handle, "COMMIT;", NULL, NULL, NULL);
+
+       return true;
+}
diff --git a/plugin/media-ebook-plugin-dbinserter.h b/plugin/media-ebook-plugin-dbinserter.h
new file mode 100644 (file)
index 0000000..57e1f07
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef __MEDIA_EBOOK_PLUGIN_DBINSERTER_H__
+#define __MEDIA_EBOOK_PLUGIN_DBINSERTER_H__
+
+#include <string>
+#include "media-ebook-plugin-interface.h"
+
+class DbInserter : public IRunnable
+{
+public:
+       DbInserter(std::string path, const void* handle)
+               : filePath(path), dbHandle(handle) { }
+       ~DbInserter() override = default;
+
+       bool run(const std::string& text) override;
+
+private:
+       std::string filePath {};
+       const void *dbHandle {};
+};
+
+#endif
diff --git a/plugin/media-ebook-plugin-epub.cpp b/plugin/media-ebook-plugin-epub.cpp
new file mode 100644 (file)
index 0000000..59b969e
--- /dev/null
@@ -0,0 +1,168 @@
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "media-ebook-plugin-epub.h"
+
+#include <dlog.h>
+#include <glib.h>
+#include <vector>
+
+#ifdef LOG_TAG
+#undef LOG_TAG
+#endif
+
+#define LOG_TAG "MEDIA_SERVICE"
+
+using namespace EBook;
+
+Epub::Epub(std::string path, std::unique_ptr<IRunnable> runner)
+       : runner(std::move(runner))
+{
+       if (path.empty()) {
+               LOGE("invalid path");
+               return;
+       }
+
+       LOGD("%s", path.c_str());
+
+       int err = 0;
+       z = zip_open(path.c_str(), ZIP_RDONLY, &err);
+       if (err != 0)
+               LOGE("zip_open failed");
+}
+
+Epub::~Epub()
+{
+       if (!z)
+               return;
+
+       zip_close(z);
+       z = nullptr;
+}
+
+bool Epub::find()
+{
+       zip_stat_t sb {};
+
+       int entry_len = zip_get_num_entries(z, ZIP_FL_UNCHANGED);
+       for (int i = 0; i < entry_len; i++) {
+               if (!g_str_has_suffix(zip_get_name(z, i, ZIP_FL_ENC_GUESS), "html"))
+                       continue;
+
+               if (zip_stat_index(z, i, 0, &sb) != 0)
+                       continue;
+
+               zip_file_t *file = zip_fopen_index(z, i, 0);
+               if (!file)
+                       continue;
+
+               std::vector<char> file_buf(sb.size);
+
+               zip_int64_t readn = zip_fread(file, file_buf.data(), sb.size);
+               zip_fclose(file);
+
+               if ((readn == static_cast<zip_int64_t>(sb.size)) &&
+                       htmlFind(file_buf.data(), sb.size))
+                       return true;
+       }
+
+       return false;
+}
+
+bool Epub::htmlFind(const char* html_buf, int buf_size)
+{
+       htmlDocPtr doc = htmlReadMemory(html_buf, buf_size, "/", NULL,
+                                                                       HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
+
+       if (!doc) {
+               LOGE("htmlReadMemory failed");
+               return false;
+       }
+
+       bool found = htmlNodeFindRecursive(xmlDocGetRootElement(doc));
+
+       xmlFreeDoc(doc);
+
+       return found;
+}
+
+bool Epub::htmlNodeFindRecursive(xmlNodePtr node)
+{
+       for (xmlNodePtr cur = node; cur; cur = cur->next) {
+               if (cur->type == XML_TEXT_NODE &&
+                       runner->run(reinterpret_cast<char*>(cur->content)))
+                       return true;
+
+               if (htmlNodeFindRecursive(cur->children))
+                       return true;
+       }
+
+       return false;
+}
+
+void Epub::insert()
+{
+       zip_stat_t sb {};
+
+       int entry_len = zip_get_num_entries(z, ZIP_FL_UNCHANGED);
+       for (int i = 0; i < entry_len; i++) {
+               if (!g_str_has_suffix(zip_get_name(z, i, ZIP_FL_ENC_GUESS), "html"))
+                       continue;
+
+               if (zip_stat_index(z, i, 0, &sb) != 0)
+                       continue;
+
+               zip_file_t *file = zip_fopen_index(z, i, 0);
+               if (!file)
+                       continue;
+
+               std::vector<char> file_buf(sb.size);
+
+               zip_int64_t readn = zip_fread(file, file_buf.data(), sb.size);
+               zip_fclose(file);
+
+               if (readn == static_cast<zip_int64_t>(sb.size))
+                       htmlInsert(file_buf.data(), sb.size);
+       }
+}
+
+void Epub::htmlInsert(const char* html_buf, int buf_size)
+{
+       htmlDocPtr doc = htmlReadMemory(html_buf, buf_size, "/", NULL,
+                                                                       HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
+
+       if (!doc) {
+               LOGE("htmlReadMemory failed");
+               return;
+       }
+
+       htmlNodeInsertRecursive(xmlDocGetRootElement(doc));
+
+       xmlFreeDoc(doc);
+}
+
+void Epub::htmlNodeInsertRecursive(xmlNodePtr node)
+{
+       for (xmlNodePtr cur = node; cur; cur = cur->next) {
+               if (cur->type == XML_TEXT_NODE)
+                       runner->run(reinterpret_cast<char*>(cur->content));
+
+               htmlNodeInsertRecursive(cur->children);
+       }
+}
diff --git a/plugin/media-ebook-plugin-epub.h b/plugin/media-ebook-plugin-epub.h
new file mode 100644 (file)
index 0000000..61c1d16
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef __MEDIA_EBOOK_PLUGIN_EPUB_H__
+#define __MEDIA_EBOOK_PLUGIN_EPUB_H__
+
+#include "media-ebook-plugin-interface.h"
+
+#include <string>
+#include <memory>
+
+#include <zip.h>
+#include <libxml/xmlmemory.h>
+#include <libxml/parser.h>
+#include <libxml/HTMLparser.h>
+
+namespace EBook {
+
+class Epub : public IFindable, public IInsertable
+{
+public:
+       Epub(std::string path, std::unique_ptr<IRunnable> runner);
+       ~Epub() override;
+
+       bool find() override;
+       void insert() override;
+
+private:
+       bool htmlFind(const char* html_buf, int buf_size);
+       bool htmlNodeFindRecursive(xmlNodePtr node);
+       void htmlInsert(const char* html_buf, int buf_size);
+       void htmlNodeInsertRecursive(xmlNodePtr node);
+
+       zip_t* z {};
+       std::unique_ptr<IRunnable> runner {};
+};
+
+}
+
+#endif
diff --git a/plugin/media-ebook-plugin-interface.h b/plugin/media-ebook-plugin-interface.h
new file mode 100644 (file)
index 0000000..cdd617b
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef __MEDIA_EBOOK_PLUGIN_INTERFACE_H__
+#define __MEDIA_EBOOK_PLUGIN_INTERFACE_H__
+
+#include <string>
+
+class IFindable
+{
+public:
+       virtual ~IFindable() = default;
+       virtual bool find() = 0;
+};
+
+class IInsertable
+{
+public:
+       virtual ~IInsertable() = default;
+       virtual void insert() = 0;
+};
+
+class IRunnable
+{
+public:
+       virtual ~IRunnable() = default;
+       virtual bool run(const std::string& text) = 0;
+};
+
+#endif
\ No newline at end of file
diff --git a/plugin/media-ebook-plugin-pdf.cpp b/plugin/media-ebook-plugin-pdf.cpp
new file mode 100644 (file)
index 0000000..058d317
--- /dev/null
@@ -0,0 +1,148 @@
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "media-ebook-plugin-pdf.h"
+
+#include <dlog.h>
+#include <stack>
+
+#ifdef LOG_TAG
+#undef LOG_TAG
+#endif
+
+#define LOG_TAG "MEDIA_SERVICE"
+
+using namespace EBook;
+
+Pdf::Pdf(std::string path, std::unique_ptr<IRunnable> runner)
+       : runner(std::move(runner))
+{
+       if (path.empty()) {
+               LOGE("invalid path");
+               return;
+       }
+
+       LOGD("%s", path.c_str());
+
+       try {
+               pdf.Load(path.c_str());
+               loaded = true;
+       } catch (const PoDoFo::PdfError& e) {
+               LOGE("Initialization failed : %s", e.what());
+       }
+}
+
+bool Pdf::find()
+{
+       if (!loaded)
+               return false;
+
+       for (int n = 0; n < pdf.GetPageCount(); ++n)
+               if (runner->run(parseTextFromPage(n)))
+                       return true;
+
+       return false;
+}
+
+void Pdf::insert()
+{
+       if (!loaded)
+               return;
+
+       for (int n = 0; n < pdf.GetPageCount(); ++n)
+               runner->run(parseTextFromPage(n));
+}
+
+std::string Pdf::parseTextFromPage(unsigned int index)
+{
+       std::string fullText;
+
+       PoDoFo::EPdfContentsType type;
+       PoDoFo::PdfVariant var;
+       PoDoFo::PdfFont *cur_font = NULL;
+       bool text_block = false;
+       const char* tok;
+       std::stack<PoDoFo::PdfVariant> stack;
+       PoDoFo::PdfString unicode;
+       PoDoFo::PdfArray array;
+
+       PoDoFo::PdfPage* page = pdf.GetPage(index);
+       if (!page)
+               return fullText;
+
+       PoDoFo::PdfContentsTokenizer tokenizer(page);
+
+       while (tokenizer.ReadNext(type, tok, var)) {
+               if (type != PoDoFo::ePdfContentsType_Keyword) {
+                       if (text_block)
+                               stack.push(var);
+
+                       continue;
+               }
+
+               if (!text_block && strcmp(tok, "BT") == 0) {
+                       text_block = true;
+                       continue;
+               } else if (text_block && strcmp(tok, "ET") == 0) {
+                       text_block = false;
+               }
+
+               if (!text_block)
+                       continue;
+
+               if (strcmp(tok, "Tf") == 0) {
+                       if (stack.size() < 2) {
+                               cur_font = NULL;
+                               continue;
+                       }
+
+                       stack.pop();
+                       cur_font = pdf.GetFont(page->GetFromResources(PoDoFo::PdfName("Font"), stack.top().GetName()));
+               } else if (strcmp(tok, "Tj") == 0 || strcmp(tok, "'") == 0 || strcmp(tok, "\"") == 0) {
+                       if (stack.empty())
+                               continue;
+
+                       if (!cur_font || !cur_font->GetEncoding())
+                               continue;
+
+                       unicode = cur_font->GetEncoding()->ConvertToUnicode(stack.top().GetString(), cur_font);
+                       fullText += unicode.GetStringUtf8();
+
+                       stack.pop();
+               } else if (strcmp(tok, "TJ") == 0) {
+                       if (stack.empty())
+                               continue;
+
+                       array = stack.top().GetArray();
+                       stack.pop();
+
+                       for (int i = 0; i < static_cast<int>(array.GetSize()); i++) {
+                               if (array[i].IsString() || array[i].IsHexString()) {
+                                       if (!cur_font || !cur_font->GetEncoding())
+                                               continue;
+
+                                       unicode = cur_font->GetEncoding()->ConvertToUnicode(array[i].GetString(), cur_font);
+                                       fullText += unicode.GetStringUtf8();
+                               }
+                       }
+               }
+       }
+
+       return fullText;
+}
diff --git a/plugin/media-ebook-plugin-pdf.h b/plugin/media-ebook-plugin-pdf.h
new file mode 100644 (file)
index 0000000..d86d5ae
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef __MEDIA_EBOOK_PLUGIN_PDF_H__
+#define __MEDIA_EBOOK_PLUGIN_PDF_H__
+
+#include <podofo/podofo.h>
+#include <string>
+#include <memory>
+#include "media-ebook-plugin-interface.h"
+
+namespace EBook {
+
+class Pdf : public IFindable, public IInsertable
+{
+public:
+       Pdf(std::string path, std::unique_ptr<IRunnable> runner);
+       ~Pdf() override = default;
+
+       bool find() override;
+       void insert() override;
+
+private:
+       std::string parseTextFromPage(unsigned int index);
+
+       bool loaded {};
+       PoDoFo::PdfMemDocument pdf {};
+       std::unique_ptr<IRunnable> runner {};
+};
+
+}
+
+#endif
diff --git a/plugin/media-ebook-plugin-regmatch.cpp b/plugin/media-ebook-plugin-regmatch.cpp
new file mode 100644 (file)
index 0000000..bfa060d
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+#include "media-ebook-plugin-regmatch.h"
+#include <dlog.h>
+#include <regex>
+
+#ifdef LOG_TAG
+#undef LOG_TAG
+#endif
+
+#define LOG_TAG "MEDIA_SERVICE"
+
+bool RegMatch::run(const std::string& text)
+{
+       if (text.empty())
+               return false;
+
+       std::regex re(keyword, std::regex::icase);
+
+       if (std::regex_search(text, re)) {
+               LOGD("Found [%s]", keyword.c_str());
+               return true;
+       }
+
+       return false;
+}
\ No newline at end of file
diff --git a/plugin/media-ebook-plugin-regmatch.h b/plugin/media-ebook-plugin-regmatch.h
new file mode 100644 (file)
index 0000000..c73240d
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef __MEDIA_EBOOK_PLUGIN_REGMATCH_H__
+#define __MEDIA_EBOOK_PLUGIN_REGMATCH_H__
+
+#include <string>
+#include "media-ebook-plugin-interface.h"
+
+class RegMatch : public IRunnable
+{
+public:
+       explicit RegMatch(const char *keyword)
+               : keyword(keyword) { }
+       ~RegMatch() override = default;
+
+       bool run(const std::string& text) override;
+
+private:
+       std::string keyword {};
+};
+
+#endif
\ No newline at end of file
index 57776ac..321fce1 100644 (file)
  * limitations under the License.
  *
  */
-#include <podofo/podofo.h>
-#include <stack>
-#include <memory>
-#include <dlog.h>
-#include <glib.h>
-#include <stdbool.h>
 
-#include <zip.h>
-#include <libxml/xmlmemory.h>
-#include <libxml/parser.h>
-#include <libxml/HTMLparser.h>
+#include "media-ebook-plugin-interface.h"
+#include "media-ebook-plugin-epub.h"
+#include "media-ebook-plugin-pdf.h"
+#include "media-ebook-plugin-dbinserter.h"
+#include "media-ebook-plugin-regmatch.h"
 
-#include <regex>
-#include <iterator>
-#include <sqlite3.h>
+#include <memory>
 
 #ifdef LOG_TAG
 #undef LOG_TAG
 #endif
 
 #define LOG_TAG "MEDIA_SERVICE"
-#define INSERT_QUERY "INSERT INTO words(file_id, word) SELECT id, ? FROM files WHERE path=? ON CONFLICT (file_id, word) DO UPDATE SET frequency=frequency+1;"
-#define TOKEN_KEY "\\s+"
-#define SPECIAL_CHAR "[\\{\\}\\[\\]\\/?.,;:|\\)*~`!^\\-_+<>@\\#$%&\\\\=\\(\\\'\\\"]"
-
-class TextFinderInterface
-{
-public:
-       virtual ~TextFinderInterface() = default;
-       virtual bool find(const char *keyword) = 0;
-       virtual void insert() = 0;
-};
-
-class TextFinder : public TextFinderInterface
-{
-public:
-       virtual ~TextFinder() = default;
-       bool match(std::string& text, const char *keyword);
-       void batchInsert(std::string& text);
-
-       sqlite3 *dbHandle {};
-       const char *filePath {};
-};
-
-bool TextFinder::match(std::string& text, const char *keyword)
-{
-       if (!keyword)
-               return false;
-
-       if (text.empty())
-               return false;
-
-       std::regex re(keyword, std::regex::icase);
-
-       if (std::regex_search(text, re)) {
-               LOGD("Found [%s]", keyword);
-               return true;
-       }
-
-       return false;
-}
-
-void TextFinder::batchInsert(std::string& text)
-{
-       if (!dbHandle || !filePath || text.empty())
-               return;
-
-       sqlite3_stmt *stmt = NULL;
-       const std::regex sp(SPECIAL_CHAR);
-       std::string temp = std::regex_replace(text, sp, "");
-       bool isTransaction = false;
-
-       if (sqlite3_exec(dbHandle, "BEGIN;", NULL, NULL, NULL) == SQLITE_OK)
-               isTransaction = true;
-
-       sqlite3_prepare_v2(dbHandle, INSERT_QUERY, -1, &stmt, NULL);
-
-       const std::regex re(TOKEN_KEY);
-       std::sregex_token_iterator end;
-
-       for (std::sregex_token_iterator i(temp.begin(), temp.end(), re, -1); i != end; ++i) {
-               sqlite3_bind_text(stmt, 1, (*i).str().c_str(), -1, SQLITE_TRANSIENT);
-               sqlite3_bind_text(stmt, 2, filePath, -1, SQLITE_TRANSIENT);
-               sqlite3_step(stmt);
-               sqlite3_reset(stmt);
-       }
-
-       sqlite3_finalize(stmt);
-
-       if (isTransaction)
-               sqlite3_exec(dbHandle, "COMMIT;", NULL, NULL, NULL);
-}
-
-/*---------------- PDF -----------------------*/
-class PdfTextFinder : public TextFinder
-{
-public:
-       explicit PdfTextFinder(const char *path);
-       PdfTextFinder(sqlite3 *handle, const char *path);
-       bool find(const char *keyword) override;
-       void insert() override;
-
-private:
-       std::string parseTextFromPage(unsigned int index);
-       bool loaded {};
-
-       PoDoFo::PdfMemDocument pdf {};
-};
-
-PdfTextFinder::PdfTextFinder(const char *path)
-{
-       if (!path) {
-               LOGE("invalid path");
-               return;
-       }
-
-       LOGD("%s", path);
-
-       try {
-               pdf.Load(path);
-               loaded = true;
-       } catch (const PoDoFo::PdfError& e) {
-               LOGE("Initialization failed : %s", e.what());
-       }
-}
-
-PdfTextFinder::PdfTextFinder(sqlite3 *handle, const char *path)
-{
-       if (!handle) {
-               LOGE("invalid handle");
-               return;
-       }
-
-       if (!path) {
-               LOGE("invalid path");
-               return;
-       }
-
-       LOGD("%s", path);
-
-       try {
-               pdf.Load(path);
-               loaded = true;
-               dbHandle = handle;
-               filePath = path;
-       } catch (const PoDoFo::PdfError& e) {
-               LOGE("Initialization failed : %s", e.what());
-       }
-}
-
-bool PdfTextFinder::find(const char *keyword)
-{
-       if (!loaded)
-               return false;
-
-       if (!keyword) {
-               LOGE("Invalid keyword");
-               return false;
-       }
-
-       for (int n = 0; n < pdf.GetPageCount(); ++n) {
-               auto text = parseTextFromPage(n);
-               if (match(text, keyword))
-                       return true;
-       }
-
-       return false;
-}
-
-void PdfTextFinder::insert()
-{
-       if (!loaded)
-               return;
-
-       for (int n = 0; n < pdf.GetPageCount(); ++n) {
-               auto text = parseTextFromPage(n);
-               batchInsert(text);
-       }
-}
-
-std::string PdfTextFinder::parseTextFromPage(unsigned int index)
-{
-       std::string fullText;
-
-       PoDoFo::EPdfContentsType type;
-       PoDoFo::PdfVariant var;
-       PoDoFo::PdfFont *cur_font = NULL;
-       bool text_block = false;
-       const char *tok;
-       std::stack<PoDoFo::PdfVariant> stack;
-       PoDoFo::PdfString unicode;
-       PoDoFo::PdfArray array;
-
-       PoDoFo::PdfPage* page = pdf.GetPage(index);
-       if (!page)
-               return fullText;
-
-       PoDoFo::PdfContentsTokenizer tokenizer(page);
-
-       while (tokenizer.ReadNext(type, tok, var)) {
-               if (type != PoDoFo::ePdfContentsType_Keyword) {
-                       if (text_block)
-                               stack.push(var);
-
-                       continue;
-               }
-
-               if (!text_block && strcmp(tok, "BT") == 0) {
-                       text_block = true;
-                       continue;
-               } else if (text_block && strcmp(tok, "ET") == 0) {
-                       text_block = false;
-               }
-
-               if (!text_block)
-                       continue;
-
-               if (strcmp(tok, "Tf") == 0) {
-                       if (stack.size() < 2) {
-                               cur_font = NULL;
-                               continue;
-                       }
-
-                       stack.pop();
-                       cur_font = pdf.GetFont(page->GetFromResources(PoDoFo::PdfName("Font"), stack.top().GetName()));
-               } else if (strcmp(tok, "Tj") == 0 || strcmp(tok, "'") == 0 || strcmp(tok, "\"") == 0) {
-                       if (stack.empty())
-                               continue;
-
-                       if (!cur_font || !cur_font->GetEncoding())
-                               continue;
-
-                       unicode = cur_font->GetEncoding()->ConvertToUnicode(stack.top().GetString(), cur_font);
-                       fullText += unicode.GetStringUtf8();
-
-                       stack.pop();
-               } else if (strcmp(tok, "TJ") == 0) {
-                       if (stack.empty())
-                               continue;
-
-                       array = stack.top().GetArray();
-                       stack.pop();
-
-                       for (int i = 0; i < static_cast<int>(array.GetSize()); i++) {
-                               if (array[i].IsString() || array[i].IsHexString()) {
-                                       if (!cur_font || !cur_font->GetEncoding())
-                                               continue;
-
-                                       unicode = cur_font->GetEncoding()->ConvertToUnicode(array[i].GetString(), cur_font);
-                                       fullText += unicode.GetStringUtf8();
-                               }
-                       }
-               }
-       }
-
-       return fullText;
-}
-
-/*---------------- EPUB -----------------------*/
-
-class EpubTextFinder : public TextFinder
-{
-public:
-       explicit EpubTextFinder(const char *path);
-       EpubTextFinder(sqlite3 *handle, const char *path);
-       bool find(const char *keyword) override;
-       void insert() override;
-
-       ~EpubTextFinder() override;
-
-private:
-       bool htmlNodeFindRecursive(xmlNodePtr node, const char *keyword);
-       void htmlNodeFindRecursiveForDb(xmlNodePtr node);
-       bool htmlFind(const char *html_buf, int buf_size, const char *keyword);
-       void htmlFindForDb(const char *html_buf, int buf_size);
-
-       zip_t *z {};
-};
-
-EpubTextFinder::EpubTextFinder(const char *path)
-{
-       if (!path) {
-               LOGE("invalid path");
-               return;
-       }
-
-       LOGD("%s", path);
-
-       int err = 0;
-       z = zip_open(path, ZIP_RDONLY, &err);
-       if (err != 0)
-               LOGE("zip_open failed");
-}
-
-EpubTextFinder::EpubTextFinder(sqlite3 *handle, const char *path)
-{
-       if (!handle) {
-               LOGE("invalid handle");
-               return;
-       }
-
-       if (!path) {
-               LOGE("invalid path");
-               return;
-       }
-
-       LOGD("%s", path);
-
-       int err = 0;
-       z = zip_open(path, ZIP_RDONLY, &err);
-       if (err != 0)
-               LOGE("zip_open failed");
 
-       dbHandle = handle;
-       filePath = path;
-}
-
-EpubTextFinder::~EpubTextFinder()
-{
-       if (!z)
-               return;
-
-       zip_close(z);
-       z = nullptr;
-}
-
-bool EpubTextFinder::find(const char *keyword)
-{
-       zip_stat_t sb = {0, };
-
-       if (!keyword) {
-               LOGE("Invalid keyword");
-               return false;
-       }
-
-       int entry_len = zip_get_num_entries(z, ZIP_FL_UNCHANGED);
-       for (int i = 0; i < entry_len; i++) {
-               if (!g_str_has_suffix(zip_get_name(z, i, ZIP_FL_ENC_GUESS), "html"))
-                       continue;
-
-               if (zip_stat_index(z, i, 0, &sb) != 0)
-                       continue;
-
-               zip_file_t *file = zip_fopen_index(z, i, 0);
-               if (!file)
-                       continue;
-
-               std::vector<char> file_buf(sb.size);
-
-               zip_int64_t readn = zip_fread(file, file_buf.data(), sb.size);
-               zip_fclose(file);
-
-               if ((readn == static_cast<zip_int64_t>(sb.size)) &&
-                       htmlFind(file_buf.data(), sb.size, keyword))
-                       return true;
-       }
-
-       return false;
-}
-
-void EpubTextFinder::insert()
-{
-       zip_stat_t sb = {0, };
-
-       int entry_len = zip_get_num_entries(z, ZIP_FL_UNCHANGED);
-       for (int i = 0; i < entry_len; i++) {
-               if (!g_str_has_suffix(zip_get_name(z, i, ZIP_FL_ENC_GUESS), "html"))
-                       continue;
-
-               if (zip_stat_index(z, i, 0, &sb) != 0)
-                       continue;
-
-               zip_file_t *file = zip_fopen_index(z, i, 0);
-               if (!file)
-                       continue;
-
-               std::vector<char> file_buf(sb.size);
-
-               zip_int64_t readn = zip_fread(file, file_buf.data(), sb.size);
-               zip_fclose(file);
-
-               if (readn == static_cast<zip_int64_t>(sb.size))
-                       htmlFindForDb(file_buf.data(), sb.size);
-       }
-}
-
-bool EpubTextFinder::htmlNodeFindRecursive(xmlNodePtr node, const char *keyword)
-{
-       for (xmlNodePtr cur = node; cur; cur = cur->next) {
-               if (cur->type == XML_TEXT_NODE) {
-                       std::string text(reinterpret_cast<char*>(cur->content));
-                       if (match(text, keyword))
-                               return true;
-               }
-
-               if (htmlNodeFindRecursive(cur->children, keyword))
-                       return true;
-       }
-
-       return false;
-}
-
-void EpubTextFinder::htmlNodeFindRecursiveForDb(xmlNodePtr node)
-{
-       for (xmlNodePtr cur = node; cur; cur = cur->next) {
-               if (cur->type == XML_TEXT_NODE) {
-                       std::string text(reinterpret_cast<char*>(cur->content));
-                       batchInsert(text);
-               }
-
-               htmlNodeFindRecursiveForDb(cur->children);
-       }
-}
-
-void EpubTextFinder::htmlFindForDb(const char *html_buf, int buf_size)
-{
-       htmlDocPtr doc = htmlReadMemory(html_buf, buf_size, "/", NULL,
-                                                                       HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
-
-       if (!doc) {
-               LOGE("htmlReadMemory failed");
-               return;
-       }
-
-       htmlNodeFindRecursiveForDb(xmlDocGetRootElement(doc));
-       xmlFreeDoc(doc);
-}
-
-bool EpubTextFinder::htmlFind(const char *html_buf, int buf_size, const char *keyword)
-{
-       htmlDocPtr doc = htmlReadMemory(html_buf, buf_size, "/", NULL,
-                                                                       HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
-
-       if (!doc) {
-               LOGE("htmlReadMemory failed");
-               return false;
-       }
-
-       bool result = htmlNodeFindRecursive(xmlDocGetRootElement(doc), keyword);
-
-       xmlFreeDoc(doc);
-
-       return result;
-}
+using namespace std;
 
 extern "C" bool media_svc_pdf_is_keyword_included(const char *path, const char *keyword)
 {
-       /* ToDo: factory pattern */
-       std::unique_ptr<TextFinderInterface> ebookText = std::make_unique<PdfTextFinder>(path);
-
-       return ebookText->find(keyword);
+       return unique_ptr<IFindable>{ make_unique<EBook::Pdf>(path, make_unique<RegMatch>(keyword)) }->find();
 }
 
 extern "C" bool media_svc_epub_is_keyword_included(const char *path, const char *keyword)
 {
-       /* ToDo: factory pattern */
-       std::unique_ptr<TextFinderInterface> ebookText = std::make_unique<EpubTextFinder>(path);
-
-       return ebookText->find(keyword);
+       return unique_ptr<IFindable>{ make_unique<EBook::Epub>(path, make_unique<RegMatch>(keyword)) }->find();
 }
 
-extern "C" void media_svc_pdf_insert_to_db(sqlite3 *handle, const char *path)
+extern "C" void media_svc_pdf_insert_to_db(void *handle, const char *path)
 {
-       std::unique_ptr<TextFinderInterface> ebookText = std::make_unique<PdfTextFinder>(handle, path);
-
-       ebookText->insert();
+       unique_ptr<IInsertable>{ make_unique<EBook::Pdf>(path, make_unique<DbInserter>(path, handle)) }->insert();
 }
 
-extern "C" void media_svc_epub_insert_to_db(sqlite3 *handle, const char *path)
+extern "C" void media_svc_epub_insert_to_db(void *handle, const char *path)
 {
-       std::unique_ptr<TextFinderInterface> ebookText = std::make_unique<EpubTextFinder>(handle, path);
-
-       ebookText->insert();
+       unique_ptr<IInsertable>{ make_unique<EBook::Epub>(path, make_unique<DbInserter>(path, handle)) }->insert();
 }