Refactoring ebook plugin
[platform/core/multimedia/libmedia-service.git] / plugin / media-ebook-plugin-pdf.cpp
1 /*
2  * libmedia-service
3  *
4  * Copyright (c) 2022 Samsung Electronics Co., Ltd. All rights reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  */
19
20 #include "media-ebook-plugin-pdf.h"
21
22 #include <dlog.h>
23 #include <stack>
24
25 #ifdef LOG_TAG
26 #undef LOG_TAG
27 #endif
28
29 #define LOG_TAG "MEDIA_SERVICE"
30
31 using namespace EBook;
32
33 Pdf::Pdf(std::string path, std::unique_ptr<IRunnable> runner)
34         : runner(std::move(runner))
35 {
36         if (path.empty()) {
37                 LOGE("invalid path");
38                 return;
39         }
40
41         LOGD("%s", path.c_str());
42
43         try {
44                 pdf.Load(path.c_str());
45                 loaded = true;
46         } catch (const PoDoFo::PdfError& e) {
47                 LOGE("Initialization failed : %s", e.what());
48         }
49 }
50
51 bool Pdf::find()
52 {
53         if (!loaded)
54                 return false;
55
56         for (int n = 0; n < pdf.GetPageCount(); ++n)
57                 if (runner->run(parseTextFromPage(n)))
58                         return true;
59
60         return false;
61 }
62
63 void Pdf::insert()
64 {
65         if (!loaded)
66                 return;
67
68         for (int n = 0; n < pdf.GetPageCount(); ++n)
69                 runner->run(parseTextFromPage(n));
70 }
71
72 std::string Pdf::parseTextFromPage(unsigned int index)
73 {
74         std::string fullText;
75
76         PoDoFo::EPdfContentsType type;
77         PoDoFo::PdfVariant var;
78         PoDoFo::PdfFont *cur_font = NULL;
79         bool text_block = false;
80         const char* tok;
81         std::stack<PoDoFo::PdfVariant> stack;
82         PoDoFo::PdfString unicode;
83         PoDoFo::PdfArray array;
84
85         PoDoFo::PdfPage* page = pdf.GetPage(index);
86         if (!page)
87                 return fullText;
88
89         PoDoFo::PdfContentsTokenizer tokenizer(page);
90
91         while (tokenizer.ReadNext(type, tok, var)) {
92                 if (type != PoDoFo::ePdfContentsType_Keyword) {
93                         if (text_block)
94                                 stack.push(var);
95
96                         continue;
97                 }
98
99                 if (!text_block && strcmp(tok, "BT") == 0) {
100                         text_block = true;
101                         continue;
102                 } else if (text_block && strcmp(tok, "ET") == 0) {
103                         text_block = false;
104                 }
105
106                 if (!text_block)
107                         continue;
108
109                 if (strcmp(tok, "Tf") == 0) {
110                         if (stack.size() < 2) {
111                                 cur_font = NULL;
112                                 continue;
113                         }
114
115                         stack.pop();
116                         cur_font = pdf.GetFont(page->GetFromResources(PoDoFo::PdfName("Font"), stack.top().GetName()));
117                 } else if (strcmp(tok, "Tj") == 0 || strcmp(tok, "'") == 0 || strcmp(tok, "\"") == 0) {
118                         if (stack.empty())
119                                 continue;
120
121                         if (!cur_font || !cur_font->GetEncoding())
122                                 continue;
123
124                         unicode = cur_font->GetEncoding()->ConvertToUnicode(stack.top().GetString(), cur_font);
125                         fullText += unicode.GetStringUtf8();
126
127                         stack.pop();
128                 } else if (strcmp(tok, "TJ") == 0) {
129                         if (stack.empty())
130                                 continue;
131
132                         array = stack.top().GetArray();
133                         stack.pop();
134
135                         for (int i = 0; i < static_cast<int>(array.GetSize()); i++) {
136                                 if (array[i].IsString() || array[i].IsHexString()) {
137                                         if (!cur_font || !cur_font->GetEncoding())
138                                                 continue;
139
140                                         unicode = cur_font->GetEncoding()->ConvertToUnicode(array[i].GetString(), cur_font);
141                                         fullText += unicode.GetStringUtf8();
142                                 }
143                         }
144                 }
145         }
146
147         return fullText;
148 }