SET(EBOOKPLUGIN_SRCS
plugin/media-ebook-plugin.cpp
+ plugin/media-ebook-plugin-regmatch.cpp
+ plugin/media-ebook-plugin-dbinserter.cpp
+ plugin/media-ebook-plugin-pdf.cpp
+ plugin/media-ebook-plugin-epub.cpp
)
SET(CONTENTPLUGIN_SRCS
plugin/media-content-plugin.c
--- /dev/null
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <dlog.h>
+#include <regex>
+#include <sqlite3.h>
+#include "media-ebook-plugin-dbinserter.h"
+
+#ifdef LOG_TAG
+#undef LOG_TAG
+#endif
+
+#define LOG_TAG "MEDIA_SERVICE"
+
+#define INSERT_QUERY "INSERT INTO words(file_id, word) SELECT id, ? FROM files WHERE path=? ON CONFLICT (file_id, word) DO UPDATE SET frequency=frequency+1;"
+#define TOKEN_KEY "\\s+"
+#define SPECIAL_CHAR "[\\{\\}\\[\\]\\/?.,;:|\\)*~`!^\\-_+<>@\\#$%&\\\\=\\(\\\'\\\"]"
+
+bool DbInserter::run(const std::string& text)
+{
+ if (!dbHandle || filePath.empty() || text.empty())
+ return false;
+
+ auto sqlite_handle = static_cast<sqlite3 *>(const_cast<void *>(dbHandle));
+
+ const std::regex sp(SPECIAL_CHAR);
+ std::string _text = std::regex_replace(text, sp, "");
+ const std::regex re(TOKEN_KEY);
+ auto words_begin = std::sregex_token_iterator(_text.begin(), _text.end(), re, -1);
+ auto words_end = std::sregex_token_iterator();
+
+ bool isTransaction = (sqlite3_exec(sqlite_handle, "BEGIN;", NULL, NULL, NULL) == SQLITE_OK);
+
+ sqlite3_stmt *stmt = NULL;
+ sqlite3_prepare_v2(sqlite_handle, INSERT_QUERY, -1, &stmt, NULL);
+
+ for (auto i = words_begin; i != words_end; ++i) {
+ sqlite3_bind_text(stmt, 1, (*i).str().c_str(), -1, SQLITE_TRANSIENT);
+ sqlite3_bind_text(stmt, 2, filePath.c_str(), -1, SQLITE_TRANSIENT);
+ sqlite3_step(stmt);
+ sqlite3_reset(stmt);
+ }
+
+ sqlite3_finalize(stmt);
+
+ if (isTransaction)
+ sqlite3_exec(sqlite_handle, "COMMIT;", NULL, NULL, NULL);
+
+ return true;
+}
--- /dev/null
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef __MEDIA_EBOOK_PLUGIN_DBINSERTER_H__
+#define __MEDIA_EBOOK_PLUGIN_DBINSERTER_H__
+
+#include <string>
+#include "media-ebook-plugin-interface.h"
+
+class DbInserter : public IRunnable
+{
+public:
+ DbInserter(std::string path, const void* handle)
+ : filePath(path), dbHandle(handle) { }
+ ~DbInserter() override = default;
+
+ bool run(const std::string& text) override;
+
+private:
+ std::string filePath {};
+ const void *dbHandle {};
+};
+
+#endif
--- /dev/null
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "media-ebook-plugin-epub.h"
+
+#include <dlog.h>
+#include <glib.h>
+#include <vector>
+
+#ifdef LOG_TAG
+#undef LOG_TAG
+#endif
+
+#define LOG_TAG "MEDIA_SERVICE"
+
+using namespace EBook;
+
+Epub::Epub(std::string path, std::unique_ptr<IRunnable> runner)
+ : runner(std::move(runner))
+{
+ if (path.empty()) {
+ LOGE("invalid path");
+ return;
+ }
+
+ LOGD("%s", path.c_str());
+
+ int err = 0;
+ z = zip_open(path.c_str(), ZIP_RDONLY, &err);
+ if (err != 0)
+ LOGE("zip_open failed");
+}
+
+Epub::~Epub()
+{
+ if (!z)
+ return;
+
+ zip_close(z);
+ z = nullptr;
+}
+
+bool Epub::find()
+{
+ zip_stat_t sb {};
+
+ int entry_len = zip_get_num_entries(z, ZIP_FL_UNCHANGED);
+ for (int i = 0; i < entry_len; i++) {
+ if (!g_str_has_suffix(zip_get_name(z, i, ZIP_FL_ENC_GUESS), "html"))
+ continue;
+
+ if (zip_stat_index(z, i, 0, &sb) != 0)
+ continue;
+
+ zip_file_t *file = zip_fopen_index(z, i, 0);
+ if (!file)
+ continue;
+
+ std::vector<char> file_buf(sb.size);
+
+ zip_int64_t readn = zip_fread(file, file_buf.data(), sb.size);
+ zip_fclose(file);
+
+ if ((readn == static_cast<zip_int64_t>(sb.size)) &&
+ htmlFind(file_buf.data(), sb.size))
+ return true;
+ }
+
+ return false;
+}
+
+bool Epub::htmlFind(const char* html_buf, int buf_size)
+{
+ htmlDocPtr doc = htmlReadMemory(html_buf, buf_size, "/", NULL,
+ HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
+
+ if (!doc) {
+ LOGE("htmlReadMemory failed");
+ return false;
+ }
+
+ bool found = htmlNodeFindRecursive(xmlDocGetRootElement(doc));
+
+ xmlFreeDoc(doc);
+
+ return found;
+}
+
+bool Epub::htmlNodeFindRecursive(xmlNodePtr node)
+{
+ for (xmlNodePtr cur = node; cur; cur = cur->next) {
+ if (cur->type == XML_TEXT_NODE &&
+ runner->run(reinterpret_cast<char*>(cur->content)))
+ return true;
+
+ if (htmlNodeFindRecursive(cur->children))
+ return true;
+ }
+
+ return false;
+}
+
+void Epub::insert()
+{
+ zip_stat_t sb {};
+
+ int entry_len = zip_get_num_entries(z, ZIP_FL_UNCHANGED);
+ for (int i = 0; i < entry_len; i++) {
+ if (!g_str_has_suffix(zip_get_name(z, i, ZIP_FL_ENC_GUESS), "html"))
+ continue;
+
+ if (zip_stat_index(z, i, 0, &sb) != 0)
+ continue;
+
+ zip_file_t *file = zip_fopen_index(z, i, 0);
+ if (!file)
+ continue;
+
+ std::vector<char> file_buf(sb.size);
+
+ zip_int64_t readn = zip_fread(file, file_buf.data(), sb.size);
+ zip_fclose(file);
+
+ if (readn == static_cast<zip_int64_t>(sb.size))
+ htmlInsert(file_buf.data(), sb.size);
+ }
+}
+
+void Epub::htmlInsert(const char* html_buf, int buf_size)
+{
+ htmlDocPtr doc = htmlReadMemory(html_buf, buf_size, "/", NULL,
+ HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
+
+ if (!doc) {
+ LOGE("htmlReadMemory failed");
+ return;
+ }
+
+ htmlNodeInsertRecursive(xmlDocGetRootElement(doc));
+
+ xmlFreeDoc(doc);
+}
+
+void Epub::htmlNodeInsertRecursive(xmlNodePtr node)
+{
+ for (xmlNodePtr cur = node; cur; cur = cur->next) {
+ if (cur->type == XML_TEXT_NODE)
+ runner->run(reinterpret_cast<char*>(cur->content));
+
+ htmlNodeInsertRecursive(cur->children);
+ }
+}
--- /dev/null
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef __MEDIA_EBOOK_PLUGIN_EPUB_H__
+#define __MEDIA_EBOOK_PLUGIN_EPUB_H__
+
+#include "media-ebook-plugin-interface.h"
+
+#include <string>
+#include <memory>
+
+#include <zip.h>
+#include <libxml/xmlmemory.h>
+#include <libxml/parser.h>
+#include <libxml/HTMLparser.h>
+
+namespace EBook {
+
+class Epub : public IFindable, public IInsertable
+{
+public:
+ Epub(std::string path, std::unique_ptr<IRunnable> runner);
+ ~Epub() override;
+
+ bool find() override;
+ void insert() override;
+
+private:
+ bool htmlFind(const char* html_buf, int buf_size);
+ bool htmlNodeFindRecursive(xmlNodePtr node);
+ void htmlInsert(const char* html_buf, int buf_size);
+ void htmlNodeInsertRecursive(xmlNodePtr node);
+
+ zip_t* z {};
+ std::unique_ptr<IRunnable> runner {};
+};
+
+}
+
+#endif
--- /dev/null
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef __MEDIA_EBOOK_PLUGIN_INTERFACE_H__
+#define __MEDIA_EBOOK_PLUGIN_INTERFACE_H__
+
+#include <string>
+
+class IFindable
+{
+public:
+ virtual ~IFindable() = default;
+ virtual bool find() = 0;
+};
+
+class IInsertable
+{
+public:
+ virtual ~IInsertable() = default;
+ virtual void insert() = 0;
+};
+
+class IRunnable
+{
+public:
+ virtual ~IRunnable() = default;
+ virtual bool run(const std::string& text) = 0;
+};
+
+#endif
\ No newline at end of file
--- /dev/null
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "media-ebook-plugin-pdf.h"
+
+#include <dlog.h>
+#include <stack>
+
+#ifdef LOG_TAG
+#undef LOG_TAG
+#endif
+
+#define LOG_TAG "MEDIA_SERVICE"
+
+using namespace EBook;
+
+Pdf::Pdf(std::string path, std::unique_ptr<IRunnable> runner)
+ : runner(std::move(runner))
+{
+ if (path.empty()) {
+ LOGE("invalid path");
+ return;
+ }
+
+ LOGD("%s", path.c_str());
+
+ try {
+ pdf.Load(path.c_str());
+ loaded = true;
+ } catch (const PoDoFo::PdfError& e) {
+ LOGE("Initialization failed : %s", e.what());
+ }
+}
+
+bool Pdf::find()
+{
+ if (!loaded)
+ return false;
+
+ for (int n = 0; n < pdf.GetPageCount(); ++n)
+ if (runner->run(parseTextFromPage(n)))
+ return true;
+
+ return false;
+}
+
+void Pdf::insert()
+{
+ if (!loaded)
+ return;
+
+ for (int n = 0; n < pdf.GetPageCount(); ++n)
+ runner->run(parseTextFromPage(n));
+}
+
+std::string Pdf::parseTextFromPage(unsigned int index)
+{
+ std::string fullText;
+
+ PoDoFo::EPdfContentsType type;
+ PoDoFo::PdfVariant var;
+ PoDoFo::PdfFont *cur_font = NULL;
+ bool text_block = false;
+ const char* tok;
+ std::stack<PoDoFo::PdfVariant> stack;
+ PoDoFo::PdfString unicode;
+ PoDoFo::PdfArray array;
+
+ PoDoFo::PdfPage* page = pdf.GetPage(index);
+ if (!page)
+ return fullText;
+
+ PoDoFo::PdfContentsTokenizer tokenizer(page);
+
+ while (tokenizer.ReadNext(type, tok, var)) {
+ if (type != PoDoFo::ePdfContentsType_Keyword) {
+ if (text_block)
+ stack.push(var);
+
+ continue;
+ }
+
+ if (!text_block && strcmp(tok, "BT") == 0) {
+ text_block = true;
+ continue;
+ } else if (text_block && strcmp(tok, "ET") == 0) {
+ text_block = false;
+ }
+
+ if (!text_block)
+ continue;
+
+ if (strcmp(tok, "Tf") == 0) {
+ if (stack.size() < 2) {
+ cur_font = NULL;
+ continue;
+ }
+
+ stack.pop();
+ cur_font = pdf.GetFont(page->GetFromResources(PoDoFo::PdfName("Font"), stack.top().GetName()));
+ } else if (strcmp(tok, "Tj") == 0 || strcmp(tok, "'") == 0 || strcmp(tok, "\"") == 0) {
+ if (stack.empty())
+ continue;
+
+ if (!cur_font || !cur_font->GetEncoding())
+ continue;
+
+ unicode = cur_font->GetEncoding()->ConvertToUnicode(stack.top().GetString(), cur_font);
+ fullText += unicode.GetStringUtf8();
+
+ stack.pop();
+ } else if (strcmp(tok, "TJ") == 0) {
+ if (stack.empty())
+ continue;
+
+ array = stack.top().GetArray();
+ stack.pop();
+
+ for (int i = 0; i < static_cast<int>(array.GetSize()); i++) {
+ if (array[i].IsString() || array[i].IsHexString()) {
+ if (!cur_font || !cur_font->GetEncoding())
+ continue;
+
+ unicode = cur_font->GetEncoding()->ConvertToUnicode(array[i].GetString(), cur_font);
+ fullText += unicode.GetStringUtf8();
+ }
+ }
+ }
+ }
+
+ return fullText;
+}
--- /dev/null
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef __MEDIA_EBOOK_PLUGIN_PDF_H__
+#define __MEDIA_EBOOK_PLUGIN_PDF_H__
+
+#include <podofo/podofo.h>
+#include <string>
+#include <memory>
+#include "media-ebook-plugin-interface.h"
+
+namespace EBook {
+
+class Pdf : public IFindable, public IInsertable
+{
+public:
+ Pdf(std::string path, std::unique_ptr<IRunnable> runner);
+ ~Pdf() override = default;
+
+ bool find() override;
+ void insert() override;
+
+private:
+ std::string parseTextFromPage(unsigned int index);
+
+ bool loaded {};
+ PoDoFo::PdfMemDocument pdf {};
+ std::unique_ptr<IRunnable> runner {};
+};
+
+}
+
+#endif
--- /dev/null
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+#include "media-ebook-plugin-regmatch.h"
+#include <dlog.h>
+#include <regex>
+
+#ifdef LOG_TAG
+#undef LOG_TAG
+#endif
+
+#define LOG_TAG "MEDIA_SERVICE"
+
+bool RegMatch::run(const std::string& text)
+{
+ if (text.empty())
+ return false;
+
+ std::regex re(keyword, std::regex::icase);
+
+ if (std::regex_search(text, re)) {
+ LOGD("Found [%s]", keyword.c_str());
+ return true;
+ }
+
+ return false;
+}
\ No newline at end of file
--- /dev/null
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#ifndef __MEDIA_EBOOK_PLUGIN_REGMATCH_H__
+#define __MEDIA_EBOOK_PLUGIN_REGMATCH_H__
+
+#include <string>
+#include "media-ebook-plugin-interface.h"
+
+class RegMatch : public IRunnable
+{
+public:
+ explicit RegMatch(const char *keyword)
+ : keyword(keyword) { }
+ ~RegMatch() override = default;
+
+ bool run(const std::string& text) override;
+
+private:
+ std::string keyword {};
+};
+
+#endif
\ No newline at end of file
* limitations under the License.
*
*/
-#include <podofo/podofo.h>
-#include <stack>
-#include <memory>
-#include <dlog.h>
-#include <glib.h>
-#include <stdbool.h>
-#include <zip.h>
-#include <libxml/xmlmemory.h>
-#include <libxml/parser.h>
-#include <libxml/HTMLparser.h>
+#include "media-ebook-plugin-interface.h"
+#include "media-ebook-plugin-epub.h"
+#include "media-ebook-plugin-pdf.h"
+#include "media-ebook-plugin-dbinserter.h"
+#include "media-ebook-plugin-regmatch.h"
-#include <regex>
-#include <iterator>
-#include <sqlite3.h>
+#include <memory>
#ifdef LOG_TAG
#undef LOG_TAG
#endif
#define LOG_TAG "MEDIA_SERVICE"
-#define INSERT_QUERY "INSERT INTO words(file_id, word) SELECT id, ? FROM files WHERE path=? ON CONFLICT (file_id, word) DO UPDATE SET frequency=frequency+1;"
-#define TOKEN_KEY "\\s+"
-#define SPECIAL_CHAR "[\\{\\}\\[\\]\\/?.,;:|\\)*~`!^\\-_+<>@\\#$%&\\\\=\\(\\\'\\\"]"
-
-class TextFinderInterface
-{
-public:
- virtual ~TextFinderInterface() = default;
- virtual bool find(const char *keyword) = 0;
- virtual void insert() = 0;
-};
-
-class TextFinder : public TextFinderInterface
-{
-public:
- virtual ~TextFinder() = default;
- bool match(std::string& text, const char *keyword);
- void batchInsert(std::string& text);
-
- sqlite3 *dbHandle {};
- const char *filePath {};
-};
-
-bool TextFinder::match(std::string& text, const char *keyword)
-{
- if (!keyword)
- return false;
-
- if (text.empty())
- return false;
-
- std::regex re(keyword, std::regex::icase);
-
- if (std::regex_search(text, re)) {
- LOGD("Found [%s]", keyword);
- return true;
- }
-
- return false;
-}
-
-void TextFinder::batchInsert(std::string& text)
-{
- if (!dbHandle || !filePath || text.empty())
- return;
-
- sqlite3_stmt *stmt = NULL;
- const std::regex sp(SPECIAL_CHAR);
- std::string temp = std::regex_replace(text, sp, "");
- bool isTransaction = false;
-
- if (sqlite3_exec(dbHandle, "BEGIN;", NULL, NULL, NULL) == SQLITE_OK)
- isTransaction = true;
-
- sqlite3_prepare_v2(dbHandle, INSERT_QUERY, -1, &stmt, NULL);
-
- const std::regex re(TOKEN_KEY);
- std::sregex_token_iterator end;
-
- for (std::sregex_token_iterator i(temp.begin(), temp.end(), re, -1); i != end; ++i) {
- sqlite3_bind_text(stmt, 1, (*i).str().c_str(), -1, SQLITE_TRANSIENT);
- sqlite3_bind_text(stmt, 2, filePath, -1, SQLITE_TRANSIENT);
- sqlite3_step(stmt);
- sqlite3_reset(stmt);
- }
-
- sqlite3_finalize(stmt);
-
- if (isTransaction)
- sqlite3_exec(dbHandle, "COMMIT;", NULL, NULL, NULL);
-}
-
-/*---------------- PDF -----------------------*/
-class PdfTextFinder : public TextFinder
-{
-public:
- explicit PdfTextFinder(const char *path);
- PdfTextFinder(sqlite3 *handle, const char *path);
- bool find(const char *keyword) override;
- void insert() override;
-
-private:
- std::string parseTextFromPage(unsigned int index);
- bool loaded {};
-
- PoDoFo::PdfMemDocument pdf {};
-};
-
-PdfTextFinder::PdfTextFinder(const char *path)
-{
- if (!path) {
- LOGE("invalid path");
- return;
- }
-
- LOGD("%s", path);
-
- try {
- pdf.Load(path);
- loaded = true;
- } catch (const PoDoFo::PdfError& e) {
- LOGE("Initialization failed : %s", e.what());
- }
-}
-
-PdfTextFinder::PdfTextFinder(sqlite3 *handle, const char *path)
-{
- if (!handle) {
- LOGE("invalid handle");
- return;
- }
-
- if (!path) {
- LOGE("invalid path");
- return;
- }
-
- LOGD("%s", path);
-
- try {
- pdf.Load(path);
- loaded = true;
- dbHandle = handle;
- filePath = path;
- } catch (const PoDoFo::PdfError& e) {
- LOGE("Initialization failed : %s", e.what());
- }
-}
-
-bool PdfTextFinder::find(const char *keyword)
-{
- if (!loaded)
- return false;
-
- if (!keyword) {
- LOGE("Invalid keyword");
- return false;
- }
-
- for (int n = 0; n < pdf.GetPageCount(); ++n) {
- auto text = parseTextFromPage(n);
- if (match(text, keyword))
- return true;
- }
-
- return false;
-}
-
-void PdfTextFinder::insert()
-{
- if (!loaded)
- return;
-
- for (int n = 0; n < pdf.GetPageCount(); ++n) {
- auto text = parseTextFromPage(n);
- batchInsert(text);
- }
-}
-
-std::string PdfTextFinder::parseTextFromPage(unsigned int index)
-{
- std::string fullText;
-
- PoDoFo::EPdfContentsType type;
- PoDoFo::PdfVariant var;
- PoDoFo::PdfFont *cur_font = NULL;
- bool text_block = false;
- const char *tok;
- std::stack<PoDoFo::PdfVariant> stack;
- PoDoFo::PdfString unicode;
- PoDoFo::PdfArray array;
-
- PoDoFo::PdfPage* page = pdf.GetPage(index);
- if (!page)
- return fullText;
-
- PoDoFo::PdfContentsTokenizer tokenizer(page);
-
- while (tokenizer.ReadNext(type, tok, var)) {
- if (type != PoDoFo::ePdfContentsType_Keyword) {
- if (text_block)
- stack.push(var);
-
- continue;
- }
-
- if (!text_block && strcmp(tok, "BT") == 0) {
- text_block = true;
- continue;
- } else if (text_block && strcmp(tok, "ET") == 0) {
- text_block = false;
- }
-
- if (!text_block)
- continue;
-
- if (strcmp(tok, "Tf") == 0) {
- if (stack.size() < 2) {
- cur_font = NULL;
- continue;
- }
-
- stack.pop();
- cur_font = pdf.GetFont(page->GetFromResources(PoDoFo::PdfName("Font"), stack.top().GetName()));
- } else if (strcmp(tok, "Tj") == 0 || strcmp(tok, "'") == 0 || strcmp(tok, "\"") == 0) {
- if (stack.empty())
- continue;
-
- if (!cur_font || !cur_font->GetEncoding())
- continue;
-
- unicode = cur_font->GetEncoding()->ConvertToUnicode(stack.top().GetString(), cur_font);
- fullText += unicode.GetStringUtf8();
-
- stack.pop();
- } else if (strcmp(tok, "TJ") == 0) {
- if (stack.empty())
- continue;
-
- array = stack.top().GetArray();
- stack.pop();
-
- for (int i = 0; i < static_cast<int>(array.GetSize()); i++) {
- if (array[i].IsString() || array[i].IsHexString()) {
- if (!cur_font || !cur_font->GetEncoding())
- continue;
-
- unicode = cur_font->GetEncoding()->ConvertToUnicode(array[i].GetString(), cur_font);
- fullText += unicode.GetStringUtf8();
- }
- }
- }
- }
-
- return fullText;
-}
-
-/*---------------- EPUB -----------------------*/
-
-class EpubTextFinder : public TextFinder
-{
-public:
- explicit EpubTextFinder(const char *path);
- EpubTextFinder(sqlite3 *handle, const char *path);
- bool find(const char *keyword) override;
- void insert() override;
-
- ~EpubTextFinder() override;
-
-private:
- bool htmlNodeFindRecursive(xmlNodePtr node, const char *keyword);
- void htmlNodeFindRecursiveForDb(xmlNodePtr node);
- bool htmlFind(const char *html_buf, int buf_size, const char *keyword);
- void htmlFindForDb(const char *html_buf, int buf_size);
-
- zip_t *z {};
-};
-
-EpubTextFinder::EpubTextFinder(const char *path)
-{
- if (!path) {
- LOGE("invalid path");
- return;
- }
-
- LOGD("%s", path);
-
- int err = 0;
- z = zip_open(path, ZIP_RDONLY, &err);
- if (err != 0)
- LOGE("zip_open failed");
-}
-
-EpubTextFinder::EpubTextFinder(sqlite3 *handle, const char *path)
-{
- if (!handle) {
- LOGE("invalid handle");
- return;
- }
-
- if (!path) {
- LOGE("invalid path");
- return;
- }
-
- LOGD("%s", path);
-
- int err = 0;
- z = zip_open(path, ZIP_RDONLY, &err);
- if (err != 0)
- LOGE("zip_open failed");
- dbHandle = handle;
- filePath = path;
-}
-
-EpubTextFinder::~EpubTextFinder()
-{
- if (!z)
- return;
-
- zip_close(z);
- z = nullptr;
-}
-
-bool EpubTextFinder::find(const char *keyword)
-{
- zip_stat_t sb = {0, };
-
- if (!keyword) {
- LOGE("Invalid keyword");
- return false;
- }
-
- int entry_len = zip_get_num_entries(z, ZIP_FL_UNCHANGED);
- for (int i = 0; i < entry_len; i++) {
- if (!g_str_has_suffix(zip_get_name(z, i, ZIP_FL_ENC_GUESS), "html"))
- continue;
-
- if (zip_stat_index(z, i, 0, &sb) != 0)
- continue;
-
- zip_file_t *file = zip_fopen_index(z, i, 0);
- if (!file)
- continue;
-
- std::vector<char> file_buf(sb.size);
-
- zip_int64_t readn = zip_fread(file, file_buf.data(), sb.size);
- zip_fclose(file);
-
- if ((readn == static_cast<zip_int64_t>(sb.size)) &&
- htmlFind(file_buf.data(), sb.size, keyword))
- return true;
- }
-
- return false;
-}
-
-void EpubTextFinder::insert()
-{
- zip_stat_t sb = {0, };
-
- int entry_len = zip_get_num_entries(z, ZIP_FL_UNCHANGED);
- for (int i = 0; i < entry_len; i++) {
- if (!g_str_has_suffix(zip_get_name(z, i, ZIP_FL_ENC_GUESS), "html"))
- continue;
-
- if (zip_stat_index(z, i, 0, &sb) != 0)
- continue;
-
- zip_file_t *file = zip_fopen_index(z, i, 0);
- if (!file)
- continue;
-
- std::vector<char> file_buf(sb.size);
-
- zip_int64_t readn = zip_fread(file, file_buf.data(), sb.size);
- zip_fclose(file);
-
- if (readn == static_cast<zip_int64_t>(sb.size))
- htmlFindForDb(file_buf.data(), sb.size);
- }
-}
-
-bool EpubTextFinder::htmlNodeFindRecursive(xmlNodePtr node, const char *keyword)
-{
- for (xmlNodePtr cur = node; cur; cur = cur->next) {
- if (cur->type == XML_TEXT_NODE) {
- std::string text(reinterpret_cast<char*>(cur->content));
- if (match(text, keyword))
- return true;
- }
-
- if (htmlNodeFindRecursive(cur->children, keyword))
- return true;
- }
-
- return false;
-}
-
-void EpubTextFinder::htmlNodeFindRecursiveForDb(xmlNodePtr node)
-{
- for (xmlNodePtr cur = node; cur; cur = cur->next) {
- if (cur->type == XML_TEXT_NODE) {
- std::string text(reinterpret_cast<char*>(cur->content));
- batchInsert(text);
- }
-
- htmlNodeFindRecursiveForDb(cur->children);
- }
-}
-
-void EpubTextFinder::htmlFindForDb(const char *html_buf, int buf_size)
-{
- htmlDocPtr doc = htmlReadMemory(html_buf, buf_size, "/", NULL,
- HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
-
- if (!doc) {
- LOGE("htmlReadMemory failed");
- return;
- }
-
- htmlNodeFindRecursiveForDb(xmlDocGetRootElement(doc));
- xmlFreeDoc(doc);
-}
-
-bool EpubTextFinder::htmlFind(const char *html_buf, int buf_size, const char *keyword)
-{
- htmlDocPtr doc = htmlReadMemory(html_buf, buf_size, "/", NULL,
- HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
-
- if (!doc) {
- LOGE("htmlReadMemory failed");
- return false;
- }
-
- bool result = htmlNodeFindRecursive(xmlDocGetRootElement(doc), keyword);
-
- xmlFreeDoc(doc);
-
- return result;
-}
+using namespace std;
extern "C" bool media_svc_pdf_is_keyword_included(const char *path, const char *keyword)
{
- /* ToDo: factory pattern */
- std::unique_ptr<TextFinderInterface> ebookText = std::make_unique<PdfTextFinder>(path);
-
- return ebookText->find(keyword);
+ return unique_ptr<IFindable>{ make_unique<EBook::Pdf>(path, make_unique<RegMatch>(keyword)) }->find();
}
extern "C" bool media_svc_epub_is_keyword_included(const char *path, const char *keyword)
{
- /* ToDo: factory pattern */
- std::unique_ptr<TextFinderInterface> ebookText = std::make_unique<EpubTextFinder>(path);
-
- return ebookText->find(keyword);
+ return unique_ptr<IFindable>{ make_unique<EBook::Epub>(path, make_unique<RegMatch>(keyword)) }->find();
}
-extern "C" void media_svc_pdf_insert_to_db(sqlite3 *handle, const char *path)
+extern "C" void media_svc_pdf_insert_to_db(void *handle, const char *path)
{
- std::unique_ptr<TextFinderInterface> ebookText = std::make_unique<PdfTextFinder>(handle, path);
-
- ebookText->insert();
+ unique_ptr<IInsertable>{ make_unique<EBook::Pdf>(path, make_unique<DbInserter>(path, handle)) }->insert();
}
-extern "C" void media_svc_epub_insert_to_db(sqlite3 *handle, const char *path)
+extern "C" void media_svc_epub_insert_to_db(void *handle, const char *path)
{
- std::unique_ptr<TextFinderInterface> ebookText = std::make_unique<EpubTextFinder>(handle, path);
-
- ebookText->insert();
+ unique_ptr<IInsertable>{ make_unique<EBook::Epub>(path, make_unique<DbInserter>(path, handle)) }->insert();
}