src/common/media-svc-album.c
src/common/media-svc-media-folder.c
src/common/media-svc-db-utils.c
- src/common/media-svc-util-pdf.cpp
- src/common/media-svc-util-epub.c
+ src/common/media-svc-util-ebook.cpp
src/common/media-svc-util.c
src/common/media-svc-noti.c
src/common/media-svc-storage.c
--- /dev/null
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+#include <podofo/podofo.h>
+#include <stack>
+#include <media-svc-util-ebook.h>
+#include <media-svc-debug.h>
+#include <glib.h>
+
+#include <zip.h>
+#include <libxml/xmlmemory.h>
+#include <libxml/parser.h>
+#include <libxml/HTMLparser.h>
+
+#include <regex>
+
+class TextFinderInterface
+{
+public:
+ virtual ~TextFinderInterface() = default;
+ virtual bool find(const char *keyword) = 0;
+};
+
+/*---------------- PDF -----------------------*/
+
+class PdfTextFinder : public TextFinderInterface
+{
+public:
+ explicit PdfTextFinder(const char *path);
+ bool find(const char *keyword) override;
+
+private:
+ std::string parseTextFromPage(unsigned int index);
+ bool match(std::string& text, const char *keyword);
+ bool loaded {};
+
+ PoDoFo::PdfMemDocument pdf {};
+};
+
+PdfTextFinder::PdfTextFinder(const char *path)
+{
+ if (!path) {
+ media_svc_error("invalid path");
+ return;
+ }
+
+ media_svc_debug("%s", path);
+
+ try {
+ pdf.Load(path);
+ loaded = true;
+ } catch (const PoDoFo::PdfError& e) {
+ media_svc_error("Initialization failed : %s", e.what());
+ }
+}
+
+bool PdfTextFinder::find(const char *keyword)
+{
+ if (!loaded)
+ return false;
+
+ media_svc_retvm_if(!keyword, false, "Invalid keyword");
+
+ for (int n = 0; n < pdf.GetPageCount(); ++n) {
+ auto text = parseTextFromPage(n);
+ if (match(text, keyword))
+ return true;
+ }
+
+ return false;
+}
+
+std::string PdfTextFinder::parseTextFromPage(unsigned int index)
+{
+ std::string fullText;
+
+ PoDoFo::EPdfContentsType type;
+ PoDoFo::PdfVariant var;
+ PoDoFo::PdfFont *cur_font = NULL;
+ bool text_block = false;
+ const char *tok;
+ std::stack<PoDoFo::PdfVariant> stack;
+ PoDoFo::PdfString unicode;
+ PoDoFo::PdfArray array;
+
+ PoDoFo::PdfPage* page = pdf.GetPage(index);
+ media_svc_retv_if(!page, fullText);
+
+ PoDoFo::PdfContentsTokenizer tokenizer(page);
+
+ while (tokenizer.ReadNext(type, tok, var)) {
+ if (type != PoDoFo::ePdfContentsType_Keyword) {
+ if (text_block)
+ stack.push(var);
+
+ continue;
+ }
+
+ if (!text_block && strcmp(tok, "BT") == 0) {
+ text_block = true;
+ continue;
+ } else if (text_block && strcmp(tok, "ET") == 0) {
+ text_block = false;
+ }
+
+ if (!text_block)
+ continue;
+
+ if (strcmp(tok, "Tf") == 0) {
+ if (stack.size() < 2) {
+ cur_font = NULL;
+ continue;
+ }
+
+ stack.pop();
+ cur_font = pdf.GetFont(page->GetFromResources(PoDoFo::PdfName("Font"), stack.top().GetName()));
+ } else if (strcmp(tok, "Tj") == 0 || strcmp(tok, "'") == 0 || strcmp(tok, "\"") == 0) {
+ if (stack.empty())
+ continue;
+
+ if (!cur_font || !cur_font->GetEncoding())
+ continue;
+
+ unicode = cur_font->GetEncoding()->ConvertToUnicode(stack.top().GetString(), cur_font);
+ fullText += unicode.GetStringUtf8();
+
+ stack.pop();
+ } else if (strcmp(tok, "TJ") == 0) {
+ if (stack.empty())
+ continue;
+
+ array = stack.top().GetArray();
+ stack.pop();
+
+ for (int i = 0; i < static_cast<int>(array.GetSize()); i++) {
+ if (array[i].IsString() || array[i].IsHexString()) {
+ if (!cur_font || !cur_font->GetEncoding())
+ continue;
+
+ unicode = cur_font->GetEncoding()->ConvertToUnicode(array[i].GetString(), cur_font);
+ fullText += unicode.GetStringUtf8();
+ }
+ }
+ }
+ }
+
+ return fullText;
+}
+
+/* ToDo : match can be passed to EbookText */
+bool PdfTextFinder::match(std::string& text, const char *keyword)
+{
+ media_svc_retv_if(!keyword, false);
+
+ if (text.empty())
+ return false;
+
+ std::regex re(keyword, std::regex::icase);
+
+ if (std::regex_search(text, re)) {
+ media_svc_debug("Found [%s]", keyword);
+ return true;
+ }
+
+ return false;
+}
+
+/*---------------- EPUB -----------------------*/
+
+class EpubTextFinder : public TextFinderInterface
+{
+public:
+ explicit EpubTextFinder(const char *path);
+ bool find(const char *keyword) override;
+
+ ~EpubTextFinder() override;
+
+private:
+ bool match(const char *text, const char *keyword);
+ bool htmlNodeFindRecursive(xmlNodePtr node, const char *keyword);
+ bool htmlFind(const char *html_buf, int buf_size, const char *keyword);
+
+ zip_t *z{};
+};
+
+EpubTextFinder::EpubTextFinder(const char *path)
+{
+ if (!path) {
+ media_svc_error("invalid path");
+ return;
+ }
+
+ media_svc_debug("%s", path);
+
+ int err = 0;
+ z = zip_open(path, ZIP_RDONLY, &err);
+ if (err != 0)
+ media_svc_error("zip_open failed");
+}
+
+EpubTextFinder::~EpubTextFinder()
+{
+ if (!z)
+ return;
+
+ zip_close(z);
+ z = nullptr;
+}
+
+bool EpubTextFinder::find(const char *keyword)
+{
+ zip_stat_t sb = {0, };
+
+ media_svc_retvm_if(!keyword, false, "Invalid keyword");
+
+ int entry_len = zip_get_num_entries(z, ZIP_FL_UNCHANGED);
+ for (int i = 0; i < entry_len; i++) {
+ if (!g_str_has_suffix(zip_get_name(z, i, ZIP_FL_ENC_GUESS), "html"))
+ continue;
+
+ if (zip_stat_index(z, i, 0, &sb) != 0)
+ continue;
+
+ zip_file_t *file = zip_fopen_index(z, i, 0);
+ if (!file)
+ continue;
+
+ std::vector<char> file_buf(sb.size);
+
+ zip_int64_t readn = zip_fread(file, file_buf.data(), sb.size);
+ zip_fclose(file);
+
+ if ((readn == static_cast<zip_int64_t>(sb.size)) &&
+ htmlFind(file_buf.data(), sb.size, keyword))
+ return true;
+ }
+
+ return false;
+}
+
+
+bool EpubTextFinder::match(const char *text, const char *keyword)
+{
+ media_svc_retv_if(!keyword, false);
+ if (!text)
+ return false;
+
+ std::regex re(keyword, std::regex::icase);
+
+ if (std::regex_search(text, re)) {
+ media_svc_debug("Found [%s]", keyword);
+ return true;
+ }
+
+ return false;
+}
+
+bool EpubTextFinder::htmlNodeFindRecursive(xmlNodePtr node, const char *keyword)
+{
+ for (xmlNodePtr cur = node; cur; cur = cur->next) {
+ if (cur->type == XML_TEXT_NODE && match((const char *)cur->content, keyword))
+ return true;
+
+ if (htmlNodeFindRecursive(cur->children, keyword))
+ return true;
+ }
+
+ return false;
+}
+
+bool EpubTextFinder::htmlFind(const char *html_buf, int buf_size, const char *keyword)
+{
+ htmlDocPtr doc = htmlReadMemory(html_buf, buf_size, "/", NULL,
+ HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
+ media_svc_retvm_if(!doc, false, "htmlReadMemory failed");
+
+ bool result = htmlNodeFindRecursive(xmlDocGetRootElement(doc), keyword);
+
+ xmlFreeDoc(doc);
+
+ return result;
+}
+
+bool _media_svc_pdf_is_keyword_included(const char *path, const char *keyword)
+{
+ /* ToDo: factory pattern */
+ std::unique_ptr<TextFinderInterface> ebookText = std::make_unique<PdfTextFinder>(path);
+
+ return ebookText->find(keyword);
+}
+
+bool _media_svc_epub_is_keyword_included(const char *path, const char *keyword)
+{
+ /* ToDo: factory pattern */
+ std::unique_ptr<TextFinderInterface> ebookText = std::make_unique<EpubTextFinder>(path);
+
+ return ebookText->find(keyword);
+}
+++ /dev/null
-/*
- * libmedia-service
- *
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- */
-
-#include <media-svc-util-epub.h>
-#include <media-svc-debug.h>
-
-#include <zip.h>
-#include <libxml/xmlmemory.h>
-#include <libxml/parser.h>
-#include <libxml/HTMLparser.h>
-#include <glib.h>
-
-static bool __media_svc_epub_find_keyword(const char *text, const char *keyword)
-{
- media_svc_retv_if(!text, false);
- media_svc_retv_if(!keyword, false);
-
- if (g_regex_match_simple(keyword, text, G_REGEX_CASELESS, (GRegexMatchFlags)0)) {
- media_svc_debug("Found");
- return true;
- }
-
- return false;
-}
-
-static bool __media_svc_epub_find_html_body(xmlNodePtr node, const char *keyword)
-{
- xmlNodePtr cur_node = NULL;
-
- for (cur_node = node; cur_node; cur_node = cur_node->next) {
- if(cur_node->type == XML_TEXT_NODE && __media_svc_epub_find_keyword((const char *)cur_node->content, keyword))
- return true;
-
- if (__media_svc_epub_find_html_body(cur_node->children, keyword))
- return true;
- }
-
- return false;
-}
-
-static bool __media_svc_epub_check_html(const char *html_buf, int buf_size, const char *keyword)
-{
- htmlDocPtr doc = NULL;
- xmlNodePtr node = NULL;
- bool result = false;
-
- doc = htmlReadMemory(html_buf, buf_size, "/", NULL, HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
- media_svc_retvm_if(!doc, false, "htmlReadMemory failed");
-
- node = xmlDocGetRootElement(doc);
- result = __media_svc_epub_find_html_body(node, keyword);
-
- xmlFreeDoc(doc);
-
- return result;
-}
-
-bool _media_svc_epub_is_keyword_included(const char *path, const char *keyword)
-{
- int err = 0;
- zip_t *z = NULL;
- zip_stat_t sb = {0, };
- zip_file_t *file = NULL;
- int entry_len = 0;
- int i = 0;
- char *file_buf = NULL;
-
- media_svc_retvm_if(!path, false, "Invalid path");
- media_svc_retvm_if(!keyword, false, "Invalid keyword");
-
- z = zip_open(path, ZIP_RDONLY, &err);
- media_svc_retvm_if(err != 0, false, "zip_open failed");
-
- entry_len = zip_get_num_entries(z, ZIP_FL_UNCHANGED);
- for (i = 0; i < entry_len; i++) {
- if (!g_str_has_suffix(zip_get_name(z, i, ZIP_FL_ENC_GUESS), "html"))
- continue;
-
- if (zip_stat_index(z, i, 0, &sb) != 0)
- continue;
-
- file = zip_fopen_index(z, i, 0);
- if (!file)
- continue;
-
- file_buf = g_malloc0(sb.size);
-
- if (zip_fread(file, file_buf, sb.size) == sb.size) {
- if (__media_svc_epub_check_html(file_buf, sb.size, keyword)) {
-// media_svc_sec_debug("File name [%s]", sb.name);
- g_free(file_buf);
- zip_close(z);
- return true;
- }
- }
-
- g_free(file_buf);
- file_buf = NULL;
- }
-
- zip_close(z);
-
- return false;
-}
\ No newline at end of file
+++ /dev/null
-/*
- * libmedia-service
- *
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- */
-#include <podofo/podofo.h>
-#include <stack>
-#include <media-svc-util-pdf.h>
-#include <media-svc-debug.h>
-#include <glib.h>
-
-using namespace std;
-using namespace PoDoFo;
-
-static bool __media_svc_pdf_find_keyword(const char *full, const char *keyword)
-{
- media_svc_retv_if(!full, false);
- media_svc_retv_if(!keyword, false);
-
- if (g_regex_match_simple(keyword, full, G_REGEX_CASELESS, (GRegexMatchFlags)0)) {
- media_svc_debug("Found");
- return true;
- }
-
- return false;
- }
-
-static char * __media_svc_pdf_parse_text(PdfMemDocument *pdf, PdfPage *page, const char *keyword)
-{
- EPdfContentsType type;
- PdfVariant var;
- PdfFont *cur_font = NULL;
- bool text_block = false;
- const char *tok;
- stack<PdfVariant> stack;
- PdfString unicode;
- PdfArray array;
-
- GString *full_text = NULL;
- gchar *tmp_text = NULL;
-
- media_svc_retv_if(!pdf, NULL);
- media_svc_retv_if(!page, NULL);
- media_svc_retv_if(!keyword, NULL);
-
- PdfContentsTokenizer tokenizer(page);
-
- full_text = g_string_new(NULL);
-
- while (tokenizer.ReadNext(type, tok, var)) {
- if (type == ePdfContentsType_Keyword) {
- if (!text_block && strcmp(tok, "BT") == 0) {
- text_block = true;
- continue;
- } else if (text_block && strcmp(tok, "ET") == 0) {
- text_block = false;
- }
-
- if (!text_block)
- continue;
-
- if (strcmp(tok, "Tf") == 0) {
- if (stack.size() < 2) {
- cur_font = NULL;
- continue;
- }
-
- stack.pop();
- cur_font = pdf->GetFont(page->GetFromResources(PdfName("Font"), stack.top().GetName()));
- } else if (strcmp(tok, "Tj") == 0 || strcmp(tok, "'") == 0 || strcmp(tok, "\"") == 0) {
- if (stack.empty())
- continue;
-
- if (!cur_font || !cur_font->GetEncoding())
- continue;
-
- unicode = cur_font->GetEncoding()->ConvertToUnicode(stack.top().GetString(), cur_font);
- full_text = g_string_append(full_text, unicode.GetStringUtf8().c_str());
-
- stack.pop();
- } else if (strcmp(tok, "TJ") == 0) {
- if (stack.empty())
- continue;
-
- array = stack.top().GetArray();
- stack.pop();
-
- for (int i = 0; i < static_cast<int>(array.GetSize()); i++) {
- if (array[i].IsString() || array[i].IsHexString()) {
- if (!cur_font || !cur_font->GetEncoding())
- continue;
-
- unicode = cur_font->GetEncoding()->ConvertToUnicode(array[i].GetString(), cur_font);
- full_text = g_string_append(full_text, unicode.GetStringUtf8().c_str());
- }
- }
- }
- } else {
- if (text_block)
- stack.push(var);
- }
- }
-
- while (!stack.empty())
- stack.pop();
-
- tmp_text = g_string_free(full_text, FALSE);
-
- /* GString start with an empty string. */
- if (strlen(tmp_text) == 0) {
- g_free(tmp_text);
- return NULL;
- } else {
- return tmp_text;
- }
-}
-
-bool _media_svc_pdf_is_keyword_included(const char *path, const char *keyword)
-{
- bool res = false;
- gchar *full_text = NULL;
-
- media_svc_retvm_if(!path, false, "Invalid path");
- media_svc_retvm_if(!keyword, false, "Invalid keyword");
-
- try {
- PdfMemDocument pdf(path);
-
- // PDF format starts from 1..
- // GetPageCount() is a value, not a calculation.. So, it does not affect the performance of this forloop.
- for (int n = 0; n < pdf.GetPageCount(); ++n) {
- PdfPage *page = pdf.GetPage(n);
-
- full_text = __media_svc_pdf_parse_text(&pdf, page, keyword);
-
- if (full_text) {
- res = __media_svc_pdf_find_keyword(full_text, keyword);
- g_free(full_text);
-
- if (res)
- return res;
- }
- }
- } catch (const PdfError& e) {
- media_svc_error("Initialization failed : %s", e.what());
- }
-
- return false;
-}
#include "media-svc-hash.h"
#include "media-svc-album.h"
#include "media-svc-localize_ch.h"
-#include "media-svc-util-pdf.h"
-#include "media-svc-util-epub.h"
+#include "media-svc-util-ebook.h"
/*For ebook metadata */
#include <zip.h>
#include <libxml/xmlmemory.h>
--- /dev/null
+/*
+ * libmedia-service
+ *
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+
+
+#ifndef _MEDIA_SVC_UTIL_EBOOK_H_
+#define _MEDIA_SVC_UTIL_EBOOK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool _media_svc_pdf_is_keyword_included(const char *path, const char *keyword);
+bool _media_svc_epub_is_keyword_included(const char *path, const char *keyword);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*_MEDIA_SVC_UTIL_EBOOK_H_*/
+++ /dev/null
-/*
- * libmedia-service
- *
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- */
-
-
-
-#ifndef _MEDIA_SVC_UTIL_EPUB_H_
-#define _MEDIA_SVC_UTIL_EPUB_H_
-
-#include <stdbool.h>
-
-bool _media_svc_epub_is_keyword_included(const char *path, const char *keyword);
-
-#endif /*_MEDIA_SVC_UTIL_EPUB_H_*/
+++ /dev/null
-/*
- * libmedia-service
- *
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- */
-
-
-
-#ifndef _MEDIA_SVC_UTIL_PDF_H_
-#define _MEDIA_SVC_UTIL_PDF_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-bool _media_svc_pdf_is_keyword_included(const char *path, const char *keyword);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /*_MEDIA_SVC_UTIL_PDF_H_*/