f9fc74b0852537939cee4407eb05da309b07d115
[platform/core/multimedia/libmedia-service.git] / plugin / media-ebook-plugin-pdf.cpp
1 /*
2  * libmedia-service
3  *
4  * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  */
19
20 #include "media-ebook-plugin-pdf.h"
21
22 #include <dlog.h>
23 #include <stack>
24
25 #ifdef LOG_TAG
26 #undef LOG_TAG
27 #endif
28
29 #define LOG_TAG "MEDIA_SERVICE"
30
31 using namespace EBook;
32
33 Pdf::Pdf(std::string path, std::unique_ptr<IRunnable> runner)
34         : runner(std::move(runner))
35 {
36         if (path.empty()) {
37                 LOGE("invalid path");
38                 return;
39         }
40
41         LOGD("%s", path.c_str());
42
43         try {
44                 PoDoFo::PdfError::EnableLogging(false);
45
46                 pdf.Load(path.c_str());
47                 loaded = true;
48         } catch (const PoDoFo::PdfError& e) {
49                 LOGE("Initialization failed : %s", e.what());
50         }
51 }
52
53 bool Pdf::find()
54 {
55         if (!loaded)
56                 return false;
57
58         for (int n = 0; n < pdf.GetPageCount(); ++n)
59                 if (runner->run(parseTextFromPage(n)))
60                         return true;
61
62         return false;
63 }
64
65 void Pdf::insert()
66 {
67         if (!loaded)
68                 return;
69
70         for (int n = 0; n < pdf.GetPageCount(); ++n)
71                 runner->run(parseTextFromPage(n));
72 }
73
74 std::string Pdf::parseTextFromPage(unsigned int index)
75 {
76         std::string fullText;
77
78         PoDoFo::EPdfContentsType type;
79         PoDoFo::PdfVariant var;
80         PoDoFo::PdfFont *cur_font = NULL;
81         bool text_block = false;
82         const char* tok;
83         std::stack<PoDoFo::PdfVariant> stack;
84         PoDoFo::PdfString unicode;
85         PoDoFo::PdfArray array;
86
87         PoDoFo::PdfPage* page = pdf.GetPage(index);
88         if (!page)
89                 return fullText;
90
91         PoDoFo::PdfContentsTokenizer tokenizer(page);
92
93         while (tokenizer.ReadNext(type, tok, var)) {
94                 if (type != PoDoFo::ePdfContentsType_Keyword) {
95                         if (text_block)
96                                 stack.push(var);
97
98                         continue;
99                 }
100
101                 if (!text_block && strcmp(tok, "BT") == 0) {
102                         text_block = true;
103                         continue;
104                 } else if (text_block && strcmp(tok, "ET") == 0) {
105                         text_block = false;
106                 }
107
108                 if (!text_block)
109                         continue;
110
111                 if (strcmp(tok, "Tf") == 0) {
112                         if (stack.size() < 2) {
113                                 cur_font = NULL;
114                                 continue;
115                         }
116
117                         stack.pop();
118                         cur_font = pdf.GetFont(page->GetFromResources(PoDoFo::PdfName("Font"), stack.top().GetName()));
119                 } else if (strcmp(tok, "Tj") == 0 || strcmp(tok, "'") == 0 || strcmp(tok, "\"") == 0) {
120                         if (stack.empty())
121                                 continue;
122
123                         if (!cur_font || !cur_font->GetEncoding())
124                                 continue;
125
126                         unicode = cur_font->GetEncoding()->ConvertToUnicode(stack.top().GetString(), cur_font);
127                         fullText += unicode.GetStringUtf8();
128
129                         stack.pop();
130                 } else if (strcmp(tok, "TJ") == 0) {
131                         if (stack.empty())
132                                 continue;
133
134                         array = stack.top().GetArray();
135                         stack.pop();
136
137                         for (int i = 0; i < static_cast<int>(array.GetSize()); i++) {
138                                 if (array[i].IsString() || array[i].IsHexString()) {
139                                         if (!cur_font || !cur_font->GetEncoding())
140                                                 continue;
141
142                                         unicode = cur_font->GetEncoding()->ConvertToUnicode(array[i].GetString(), cur_font);
143                                         fullText += unicode.GetStringUtf8();
144                                 }
145                         }
146                 }
147         }
148
149         return fullText;
150 }