[ACR-1635] Support ebook format 28/259428/7
authorMinje Ahn <minje.ahn@samsung.com>
Mon, 7 Jun 2021 23:16:13 +0000 (08:16 +0900)
committerMinje Ahn <minje.ahn@samsung.com>
Mon, 14 Jun 2021 07:33:37 +0000 (16:33 +0900)
Change-Id: I4aa398047226f1efd81ee4ee79a10a7fbea19245
Signed-off-by: Minje Ahn <minje.ahn@samsung.com>
CMakeLists.txt
packaging/libmedia-service.spec
src/common/media-svc-util.c
src/common/media-svc.c
src/include/common/media-svc-util.h

index 2b8f499..f715e16 100644 (file)
@@ -52,7 +52,7 @@ EXEC_PROGRAM("${UNAME}" ARGS "-m" OUTPUT_VARIABLE "ARCH")
 
 
 INCLUDE(FindPkgConfig)
-pkg_check_modules(pkgs REQUIRED glib-2.0 dlog sqlite3 icu-i18n libexif mm-fileinfo libmedia-utils aul uuid vconf capi-system-info mmutil-magick media-thumbnail)
+pkg_check_modules(pkgs REQUIRED glib-2.0 dlog sqlite3 icu-i18n libexif mm-fileinfo libmedia-utils aul uuid vconf libxml-2.0 libzip capi-system-info mmutil-magick media-thumbnail libpodofo)
 
 FOREACH(flag ${pkgs_CFLAGS})
        SET(EXTRA_CFLAGS "${EXTRA_CFLAGS} ${flag}")
index 2d535a6..d29d25d 100644 (file)
@@ -17,6 +17,8 @@ BuildRequires:  pkgconfig(aul)
 BuildRequires:  pkgconfig(libexif)
 BuildRequires:  pkgconfig(sqlite3)
 BuildRequires:  pkgconfig(icu-i18n)
+BuildRequires:  pkgconfig(libzip)
+BuildRequires:  pkgconfig(libxml-2.0)
 BuildRequires:  pkgconfig(mm-fileinfo)
 BuildRequires:  pkgconfig(libmedia-utils)
 BuildRequires:  pkgconfig(uuid)
@@ -24,6 +26,7 @@ BuildRequires:  pkgconfig(libtzplatform-config)
 BuildRequires:  pkgconfig(capi-system-info)
 BuildRequires:  pkgconfig(mmutil-magick)
 BuildRequires:  pkgconfig(media-thumbnail)
+BuildRequires:  pkgconfig(libpodofo)
 
 %if 0%{?gtests:1}
 BuildRequires:  pkgconfig(gmock)
index df875da..bf0db2d 100644 (file)
 #include "media-svc-hash.h"
 #include "media-svc-album.h"
 #include "media-svc-localize_ch.h"
+/*For ebook*/
+#include <zip.h>
+#include <libxml/xmlmemory.h>
+#include <libxml/parser.h>
+#include <libxml/HTMLparser.h>
 
 #define MEDIA_SVC_FILE_EXT_LEN_MAX                             6                       /**< Maximum file ext lenth*/
 
@@ -64,6 +69,7 @@
 #define MEDIA_SVC_CATEGORY_PVR 0x00000020      /**< PVR category */
 #define MEDIA_SVC_CATEGORY_UHD 0x00000040      /**< UHD category */
 #define MEDIA_SVC_CATEGORY_SCSA        0x00000080      /**< SCSA category */
+#define MEDIA_SVC_CATEGORY_BOOK        0x00000100      /**< ebook category */
 
 #define CONTENT_TYPE_NUM 5
 #define MUSIC_MIME_NUM 29
@@ -81,6 +87,9 @@
 
 #define MEDIA_SVC_DEFAULT_GPS_VALUE                    -200                    /**< Default GPS Value*/
 
+#define MEDIA_SVC_PDF_TAG_TAIL_LEN 12
+#define MEDIA_SVC_PDF_BUF_SIZE 256
+
 typedef struct {
        char content_type[15];
        int category_by_mime;
@@ -374,6 +383,11 @@ static int __media_svc_get_content_type_from_mime(const char *path, const char *
                                }
                        }
                }
+
+               if (g_str_has_suffix(mimetype, "epub+zip") || g_str_has_suffix(mimetype, "pdf")) {
+                       *category ^= MEDIA_SVC_CATEGORY_ETC;
+                       *category |= MEDIA_SVC_CATEGORY_BOOK;
+               }
        }
 
        /*check music file in sound files. */
@@ -441,6 +455,9 @@ static int __media_svc_get_media_type(const char *path, const char *mime_type, m
        case MEDIA_SVC_CATEGORY_VIDEO:
                *media_type = MEDIA_SVC_MEDIA_TYPE_VIDEO;
                break;
+       case MEDIA_SVC_CATEGORY_BOOK:
+               *media_type = MEDIA_SVC_MEDIA_TYPE_BOOK;
+               break;
        default:
                *media_type = MEDIA_SVC_MEDIA_TYPE_OTHER;
        }
@@ -1202,9 +1219,291 @@ int _media_svc_extract_media_metadata(sqlite3 *handle, bool is_direct, media_svc
        return MS_MEDIA_ERR_NONE;
 }
 
+static gchar * __media_svc_get_zipfile_data(zip_t *z, const char *fname)
+{
+       int err = 0;
+       zip_int64_t index_num = 0;
+       zip_file_t *file = NULL;
+       zip_stat_t sb = {0, };
+       gchar *buf = NULL;
+
+       media_svc_retvm_if(!z, NULL, "z is NULL");
+       media_svc_retvm_if(!fname, NULL, "fname is NULL");
+
+       index_num = zip_name_locate(z, fname, ZIP_FL_NOCASE);
+       media_svc_retvm_if(index_num == -1, NULL, "fname is not exists [%s]", fname);
+
+       err = zip_stat_index(z, index_num, ZIP_STAT_SIZE, &sb);
+       media_svc_retvm_if(err == -1, NULL, "zip_stat_index failed");
+
+       file = zip_fopen_index(z, index_num, ZIP_FL_UNCHANGED);
+       media_svc_retvm_if(!file, NULL, "zip_fopen_index failed");
+
+       buf = g_malloc0(sb.size);
+
+       err = zip_fread(file, buf, sb.size);
+       zip_fclose(file);
+
+       if (err == -1) {
+               g_free(buf);
+               buf = NULL;
+       }
+
+       return buf;
+}
+
+static xmlNodePtr __media_svc_find_node(xmlNodePtr node, const char *key)
+{
+       xmlNodePtr tmp = NULL;
+
+       media_svc_retvm_if(!node, NULL, "node is NULL");
+       media_svc_retvm_if(!key, NULL, "key is NULL");
+
+       for (tmp = node->children; tmp; tmp = tmp->next) {
+               if (xmlIsBlankNode(tmp))
+                       continue;
+
+               if (g_str_has_suffix((gchar *)tmp->name, key))
+                       return tmp;
+       }
+
+       return NULL;
+}
+
+static char * __media_svc_remove_escape_c(const char *value)
+{
+       int start = -1;
+       int end = 0;
+       int len, i;
+
+       media_svc_retv_if(!value, NULL);
+
+       len = strlen(value);
+
+       for (i = 0; i < len; i++) {
+               if (value[i] != 10 && value[i] != 32) { // 10='\n' 32=' '
+                       if (start == -1)
+                               start = i;
+
+                       end = i;
+               }
+       }
+
+       end = end - start + 1;
+
+       return g_strndup(value + start, end);
+}
+
+static char * __media_svc_find_and_get_value(xmlNodePtr node, const char *key)
+{
+       xmlNodePtr tmp = NULL;
+       char *tmp_res = NULL;
+       char *res = NULL;
+
+       media_svc_retvm_if(!node, NULL, "node is NULL");
+       media_svc_retvm_if(!key, NULL, "key is NULL");
+
+       for (tmp = node->children; tmp; tmp = tmp->next) {
+               if (xmlIsBlankNode(tmp))
+                       continue;
+
+               if (tmp->children) {
+                       tmp_res = __media_svc_find_and_get_value(tmp, key);
+                       if (tmp_res) {
+                               res = __media_svc_remove_escape_c(tmp_res);
+                               xmlFree(tmp_res);
+                               return res;
+                       }
+               }
+
+               if (g_str_has_suffix((gchar *)tmp->name, key))
+                       return (char *)xmlNodeGetContent(tmp);
+       }
+
+       return NULL;
+}
+
+static gboolean __media_svc_get_epub_root_file(zip_t *z, char **opf_file)
+{
+       gchar *buf = NULL;
+       xmlDocPtr doc = NULL;
+       xmlNodePtr node = NULL;
+
+       media_svc_retvm_if(!z, FALSE, "z is NULL");
+       media_svc_retvm_if(!opf_file, FALSE, "opf_file is NULL");
+
+       buf = __media_svc_get_zipfile_data(z, "META-INF/container.xml");
+       media_svc_retvm_if(!buf, FALSE, "buf is NULL");
+
+       doc = xmlParseDoc((const xmlChar *)buf);
+       g_free(buf);
+       media_svc_retvm_if(!doc, FALSE, "doc is NULL");
+
+       node = xmlDocGetRootElement(doc);
+       node = __media_svc_find_node(node, "rootfiles");
+       node = __media_svc_find_node(node, "rootfile");
+
+       *opf_file = (char *)xmlGetProp(node, (const xmlChar *)"full-path");
+       media_svc_sec_debug("OPF [%s]", *opf_file);
+       xmlFreeDoc(doc);
+
+       return TRUE;
+}
+
+static gboolean __media_svc_get_xml_metadata(const xmlChar *buffer, gboolean is_pdf, media_svc_content_info_s *content_info)
+{
+       xmlDocPtr doc = NULL;
+       xmlNodePtr root = NULL;
+
+       media_svc_retvm_if(!buffer, FALSE, "buffer is NULL");
+       media_svc_retvm_if(!content_info, FALSE, "content_info is NULL");
+
+       doc = xmlParseDoc(buffer);
+       media_svc_retv_if(!doc, FALSE);
+
+       root = xmlDocGetRootElement(doc);
+       if (!root) {
+               xmlFreeDoc(doc);
+               return FALSE;
+       }
+
+       content_info->media_meta.title = __media_svc_find_and_get_value(root, "title");
+       if (is_pdf && !content_info->media_meta.title) {
+               xmlFreeDoc(doc);
+               return FALSE;
+       }
+
+       content_info->media_meta.composer = __media_svc_find_and_get_value(root, "creator");
+       if (!content_info->media_meta.composer)
+               content_info->media_meta.composer = __media_svc_find_and_get_value(root, "author");
+
+       content_info->media_meta.copyright = __media_svc_find_and_get_value(root, "publisher");
+       content_info->media_meta.recorded_date = __media_svc_find_and_get_value(root, "date");
+       content_info->media_meta.genre = __media_svc_find_and_get_value(root, "subject");
+       content_info->media_meta.description = __media_svc_find_and_get_value(root, "description");
+
+       xmlFreeDoc(doc);
+
+       return TRUE;
+}
+
+static int __media_svc_get_epub_metadata(media_svc_content_info_s *content_info)
+{
+       int err = 0;
+       zip_t *z = NULL;
+       gchar *buf = NULL;
+       char *opf_path = NULL;
+
+       media_svc_retvm_if(!content_info, MS_MEDIA_ERR_INVALID_PARAMETER, "content_info is NULL");
+
+       //1. open epub
+       z = zip_open(content_info->path, ZIP_RDONLY, &err);
+       media_svc_retvm_if(err == -1, MS_MEDIA_ERR_INTERNAL, "zip_open failed");
+
+       //2. find and read opf file
+       if (!__media_svc_get_epub_root_file(z, &opf_path)) {
+               media_svc_error("__media_svc_get_epub_root_file failed");
+               zip_close(z);
+               return MS_MEDIA_ERR_INTERNAL;
+       }
+
+       //3. get metadata
+       buf = __media_svc_get_zipfile_data(z, opf_path);
+       xmlFree(opf_path);
+       zip_close(z);
+       media_svc_retvm_if(!buf, MS_MEDIA_ERR_INTERNAL, "__media_svc_get_zipfile_data failed");
+
+       if (!__media_svc_get_xml_metadata((const xmlChar *)buf, FALSE, content_info))
+               media_svc_error("__media_svc_get_xml_metadata failed");
+
+       g_free(buf);
+
+       return MS_MEDIA_ERR_NONE;
+}
+
+static int __media_svc_get_pdf_metadata(media_svc_content_info_s *content_info)
+{
+    int fd = 0;
+    int start_pos = 0;
+    int end_pos = 0;
+    int cur_pos = 0;
+    int search_limit = 0;
+    char tmp[MEDIA_SVC_PDF_BUF_SIZE + 1] = {0, };
+    gchar *meta_buf = NULL;
+    char *found = NULL;
+
+       media_svc_retvm_if(!content_info, MS_MEDIA_ERR_INVALID_PARAMETER, "content_info is NULL");
+       media_svc_retvm_if(content_info->size < 256, MS_MEDIA_ERR_INTERNAL, "open failed");
+
+       fd = open(content_info->path, O_RDONLY);
+       media_svc_retvm_if(fd < 0, MS_MEDIA_ERR_INTERNAL, "open failed");
+
+       search_limit = content_info->size - MEDIA_SVC_PDF_TAG_TAIL_LEN;
+
+       while (cur_pos <= search_limit) {
+               lseek(fd, cur_pos, SEEK_SET);
+
+               if (read(fd, &tmp, MEDIA_SVC_PDF_BUF_SIZE) != MEDIA_SVC_PDF_BUF_SIZE) {
+                       media_svc_error("read failed");
+                       break;
+               }
+
+               //1.Find <x:xmpmeta .. </x:xmpmeta> block
+               if (start_pos == 0 && (found = strstr(tmp, "<x:xmpmeta"))) {
+                       start_pos = cur_pos + (found - tmp);
+//                     media_svc_error("FIND START_POS[%d]", start_pos);
+                       found = NULL;
+               }
+
+
+               if (start_pos != 0 && (found = strstr(tmp, "</x:xmpmeta>"))) {
+                       end_pos = cur_pos + (found - tmp) + MEDIA_SVC_PDF_TAG_TAIL_LEN;
+//                     media_svc_error("FIND END_POS[%d]", end_pos);
+                       found = NULL;
+               }
+
+               //2.get metadata using xml parser
+               if (start_pos && end_pos) {
+                       meta_buf = g_malloc0(end_pos - start_pos + 1);
+
+                       lseek(fd, start_pos, SEEK_SET);
+                       if (read(fd, meta_buf, end_pos - start_pos) != end_pos - start_pos)
+                               goto NEXT;
+
+                       if (__media_svc_get_xml_metadata((const xmlChar *)meta_buf, TRUE, content_info)) {
+                               g_free(meta_buf);
+                               break;
+                       }
+
+                       g_free(meta_buf);
+                       meta_buf = NULL;
+NEXT:
+                       start_pos = 0;
+                       end_pos = 0;
+               }
+
+               cur_pos += 240;
+
+       }
+
+       close(fd);
+
+       return MS_MEDIA_ERR_NONE;
+}
+
+int _media_svc_extract_book_metadata(media_svc_content_info_s *content_info)
+{
+       media_svc_retvm_if(!content_info, MS_MEDIA_ERR_INVALID_PARAMETER, "content info is NULL");
+
+       if (g_str_has_suffix(content_info->mime_type, "epub+zip"))
+               return __media_svc_get_epub_metadata(content_info);
+       else
+               return __media_svc_get_pdf_metadata(content_info);
+}
+
 void _media_svc_destroy_content_info(media_svc_content_info_s *content_info)
 {
-       media_svc_retm_if(content_info == NULL, "content info is NULL");
+       media_svc_retm_if(!content_info, "content info is NULL");
 
        /* Delete media_svc_content_info_s */
        g_free(content_info->media_uuid);
index d69874c..a029935 100755 (executable)
@@ -220,6 +220,14 @@ int media_svc_insert_item_bulk(sqlite3 *handle, const char *storage_id, ms_user_
        case MEDIA_SVC_MEDIA_TYPE_MUSIC:
                ret = _media_svc_extract_media_metadata(handle, true, &content_info, uid);
                break;
+       case MEDIA_SVC_MEDIA_TYPE_BOOK:
+               ret = _media_svc_extract_book_metadata(&content_info);
+               /* The 'TITLE' should always be filled in */
+               if (!content_info.media_meta.title || strlen(content_info.media_meta.title) == 0) {
+                       g_free(content_info.media_meta.title);
+                       content_info.media_meta.title = _media_svc_get_title_by_path(content_info.path);
+               }
+               break;
        default:
                /* The 'TITLE' should always be filled in */
                content_info.media_meta.title = _media_svc_get_title_by_path(content_info.path);
@@ -284,6 +292,14 @@ int media_svc_insert_item_immediately(sqlite3 *handle, const char *storage_id, m
        case MEDIA_SVC_MEDIA_TYPE_MUSIC:
                ret = _media_svc_extract_media_metadata(handle, false, &content_info, uid);
                break;
+       case MEDIA_SVC_MEDIA_TYPE_BOOK:
+               ret = _media_svc_extract_book_metadata(&content_info);
+               /* The 'TITLE' should always be filled in */
+               if (!content_info.media_meta.title || strlen(content_info.media_meta.title) == 0) {
+                       g_free(content_info.media_meta.title);
+                       content_info.media_meta.title = _media_svc_get_title_by_path(content_info.path);
+               }
+               break;
        default:
                /* The 'TITLE' should always be filled in */
                content_info.media_meta.title = _media_svc_get_title_by_path(content_info.path);
@@ -495,6 +511,14 @@ int media_svc_refresh_item(sqlite3 *handle, bool is_direct, const char *storage_
        case MEDIA_SVC_MEDIA_TYPE_MUSIC:
                ret = _media_svc_extract_media_metadata(handle, is_direct, &content_info, uid);
                break;
+       case MEDIA_SVC_MEDIA_TYPE_BOOK:
+               ret = _media_svc_extract_book_metadata(&content_info);
+               /* The 'TITLE' should always be filled in */
+               if (!content_info.media_meta.title || strlen(content_info.media_meta.title) == 0) {
+                       g_free(content_info.media_meta.title);
+                       content_info.media_meta.title = _media_svc_get_title_by_path(content_info.path);
+               }
+               break;
        default:
                /* The 'TITLE' should always be filled in */
                content_info.media_meta.title = _media_svc_get_title_by_path(content_info.path);
index 0cf68fd..987ea21 100755 (executable)
@@ -123,6 +123,7 @@ typedef enum {
        MEDIA_SVC_MEDIA_TYPE_SOUND      = 2,    /**< Sound Content like Ringtone*/
        MEDIA_SVC_MEDIA_TYPE_MUSIC      = 3,    /**< Music Content like mp3*/
        MEDIA_SVC_MEDIA_TYPE_OTHER      = 4,    /**< Not media Content*/
+       MEDIA_SVC_MEDIA_TYPE_BOOK       = 5,    /**< Book Content like epub*/
 } media_svc_media_type_e;
 
 char * _media_info_generate_uuid(void);
@@ -133,6 +134,7 @@ char * _media_svc_get_title_by_path(const char *path);
 int _media_svc_set_media_info(media_svc_content_info_s *content_info, const char *storage_id, ms_user_storage_type_e storage_type, const char *path, bool refresh);
 int _media_svc_extract_image_metadata(media_svc_content_info_s *content_info);
 int _media_svc_extract_media_metadata(sqlite3 *handle, bool is_direct, media_svc_content_info_s *content_info, uid_t uid);
+int _media_svc_extract_book_metadata(media_svc_content_info_s *content_info);
 void _media_svc_destroy_content_info(media_svc_content_info_s *content_info);
 int _media_svc_create_thumbnail(const char *path, char *thumb_path, media_svc_media_type_e media_type, uid_t uid);
 int _media_svc_get_pinyin_str(const char *src_str, char **pinyin_str);