1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
7 #include "../../include/fpdfapi/fpdf_parser.h"
8 #include "../../include/fpdfapi/fpdf_page.h"
9 #include "../../include/fpdfdoc/fpdf_tagged.h"
10 #include "tagged_int.h"
11 const int nMaxRecursion = 32;
12 static FX_BOOL IsTagged(const CPDF_Document* pDoc)
14 CPDF_Dictionary* pCatalog = pDoc->GetRoot();
15 CPDF_Dictionary* pMarkInfo = pCatalog->GetDict(FX_BSTRC("MarkInfo"));
16 return pMarkInfo != NULL && pMarkInfo->GetInteger(FX_BSTRC("Marked"));
18 CPDF_StructTree* CPDF_StructTree::LoadPage(const CPDF_Document* pDoc, const CPDF_Dictionary* pPageDict)
20 if (!IsTagged(pDoc)) {
23 CPDF_StructTreeImpl* pTree = FX_NEW CPDF_StructTreeImpl(pDoc);
27 pTree->LoadPageTree(pPageDict);
30 CPDF_StructTree* CPDF_StructTree::LoadDoc(const CPDF_Document* pDoc)
32 if (!IsTagged(pDoc)) {
35 CPDF_StructTreeImpl* pTree = FX_NEW CPDF_StructTreeImpl(pDoc);
42 CPDF_StructTreeImpl::CPDF_StructTreeImpl(const CPDF_Document* pDoc)
44 CPDF_Dictionary* pCatalog = pDoc->GetRoot();
45 m_pTreeRoot = pCatalog->GetDict(FX_BSTRC("StructTreeRoot"));
46 if (m_pTreeRoot == NULL) {
49 m_pRoleMap = m_pTreeRoot->GetDict(FX_BSTRC("RoleMap"));
51 CPDF_StructTreeImpl::~CPDF_StructTreeImpl()
53 for (int i = 0; i < m_Kids.GetSize(); i ++)
58 void CPDF_StructTreeImpl::LoadDocTree()
61 if (m_pTreeRoot == NULL) {
64 CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
68 if (pKids->GetType() == PDFOBJ_DICTIONARY) {
69 CPDF_StructElementImpl* pStructElementImpl = FX_NEW CPDF_StructElementImpl(this, NULL, (CPDF_Dictionary*)pKids);
70 if (pStructElementImpl == NULL) {
73 m_Kids.Add(pStructElementImpl);
76 if (pKids->GetType() != PDFOBJ_ARRAY) {
79 CPDF_Array* pArray = (CPDF_Array*)pKids;
80 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
81 CPDF_Dictionary* pKid = pArray->GetDict(i);
82 CPDF_StructElementImpl* pStructElementImpl = FX_NEW CPDF_StructElementImpl(this, NULL, pKid);
83 if (pStructElementImpl == NULL) {
86 m_Kids.Add(pStructElementImpl);
89 void CPDF_StructTreeImpl::LoadPageTree(const CPDF_Dictionary* pPageDict)
92 if (m_pTreeRoot == NULL) {
95 CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
100 if (pKids->GetType() == PDFOBJ_DICTIONARY) {
102 } else if (pKids->GetType() == PDFOBJ_ARRAY) {
103 dwKids = ((CPDF_Array*)pKids)->GetCount();
108 m_Kids.SetSize(dwKids);
109 for (i = 0; i < dwKids; i ++) {
112 CFX_MapPtrToPtr element_map;
113 CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDict(FX_BSTRC("ParentTree"));
114 if (pParentTree == NULL) {
117 CPDF_NumberTree parent_tree(pParentTree);
118 int parents_id = pPageDict->GetInteger(FX_BSTRC("StructParents"), -1);
119 if (parents_id >= 0) {
120 CPDF_Object* pParents = parent_tree.LookupValue(parents_id);
121 if (pParents == NULL || pParents->GetType() != PDFOBJ_ARRAY) {
124 CPDF_Array* pParentArray = (CPDF_Array*)pParents;
125 for (i = 0; i < pParentArray->GetCount(); i ++) {
126 CPDF_Dictionary* pParent = pParentArray->GetDict(i);
127 if (pParent == NULL) {
130 AddPageNode(pParent, element_map);
134 CPDF_StructElementImpl* CPDF_StructTreeImpl::AddPageNode(CPDF_Dictionary* pDict, CFX_MapPtrToPtr& map, int nLevel)
136 if (nLevel > nMaxRecursion) {
139 CPDF_StructElementImpl* pElement = NULL;
140 if (map.Lookup(pDict, (FX_LPVOID&)pElement)) {
143 pElement = FX_NEW CPDF_StructElementImpl(this, NULL, pDict);
144 if (pElement == NULL) {
147 map.SetAt(pDict, pElement);
148 CPDF_Dictionary* pParent = pDict->GetDict(FX_BSTRC("P"));
149 if (pParent == NULL || pParent->GetString(FX_BSTRC("Type")) == FX_BSTRC("StructTreeRoot")) {
150 if (!AddTopLevelNode(pDict, pElement)) {
152 map.RemoveKey(pDict);
155 CPDF_StructElementImpl* pParentElement = AddPageNode(pParent, map, nLevel + 1);
156 FX_BOOL bSave = FALSE;
157 for (int i = 0; i < pParentElement->m_Kids.GetSize(); i ++) {
158 if (pParentElement->m_Kids[i].m_Type != CPDF_StructKid::Element) {
161 if (pParentElement->m_Kids[i].m_Element.m_pDict != pDict) {
164 pParentElement->m_Kids[i].m_Element.m_pElement = pElement->Retain();
169 map.RemoveKey(pDict);
174 FX_BOOL CPDF_StructTreeImpl::AddTopLevelNode(CPDF_Dictionary* pDict, CPDF_StructElementImpl* pElement)
176 CPDF_Object *pObj = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
180 if (pObj->GetType() == PDFOBJ_DICTIONARY) {
181 if (pObj->GetObjNum() == pDict->GetObjNum()) {
183 m_Kids[0]->Release();
185 m_Kids[0] = pElement->Retain();
190 if (pObj->GetType() == PDFOBJ_ARRAY) {
191 CPDF_Array* pTopKids = (CPDF_Array*)pObj;
193 FX_BOOL bSave = FALSE;
194 for (i = 0; i < pTopKids->GetCount(); i ++) {
195 CPDF_Object* pKidRef = pTopKids->GetElement(i);
196 if (pKidRef == NULL || pKidRef->GetType() != PDFOBJ_REFERENCE) {
199 if (((CPDF_Reference*) pKidRef)->GetRefObjNum() != pDict->GetObjNum()) {
203 m_Kids[i]->Release();
205 m_Kids[i] = pElement->Retain();
214 CPDF_StructElementImpl::CPDF_StructElementImpl(CPDF_StructTreeImpl* pTree, CPDF_StructElementImpl* pParent, CPDF_Dictionary* pDict)
219 m_Type = pDict->GetString(FX_BSTRC("S"));
220 CFX_ByteString mapped = pTree->m_pRoleMap->GetString(m_Type);
221 if (!mapped.IsEmpty()) {
227 CPDF_StructElementImpl::~CPDF_StructElementImpl()
229 for (int i = 0; i < m_Kids.GetSize(); i ++) {
230 if (m_Kids[i].m_Type == CPDF_StructKid::Element && m_Kids[i].m_Element.m_pElement) {
231 ((CPDF_StructElementImpl*)m_Kids[i].m_Element.m_pElement)->Release();
235 CPDF_StructElementImpl* CPDF_StructElementImpl::Retain()
240 void CPDF_StructElementImpl::Release()
242 if(--m_RefCount < 1) {
246 void CPDF_StructElementImpl::LoadKids(CPDF_Dictionary* pDict)
248 CPDF_Object* pObj = pDict->GetElement(FX_BSTRC("Pg"));
249 FX_DWORD PageObjNum = 0;
250 if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) {
251 PageObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum();
253 CPDF_Object* pKids = pDict->GetElementValue(FX_BSTRC("K"));
257 if (pKids->GetType() == PDFOBJ_ARRAY) {
258 CPDF_Array* pArray = (CPDF_Array*)pKids;
259 m_Kids.SetSize(pArray->GetCount());
260 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
261 CPDF_Object* pKid = pArray->GetElementValue(i);
262 LoadKid(PageObjNum, pKid, &m_Kids[i]);
266 LoadKid(PageObjNum, pKids, &m_Kids[0]);
269 void CPDF_StructElementImpl::LoadKid(FX_DWORD PageObjNum, CPDF_Object* pKidObj, CPDF_StructKid* pKid)
271 pKid->m_Type = CPDF_StructKid::Invalid;
272 if (pKidObj == NULL) {
275 if (pKidObj->GetType() == PDFOBJ_NUMBER) {
276 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
279 pKid->m_Type = CPDF_StructKid::PageContent;
280 pKid->m_PageContent.m_ContentId = pKidObj->GetInteger();
281 pKid->m_PageContent.m_PageObjNum = PageObjNum;
284 if (pKidObj->GetType() != PDFOBJ_DICTIONARY) {
287 CPDF_Dictionary* pKidDict = (CPDF_Dictionary*)pKidObj;
288 CPDF_Object* pPageObj = pKidDict->GetElement(FX_BSTRC("Pg"));
289 if (pPageObj && pPageObj->GetType() == PDFOBJ_REFERENCE) {
290 PageObjNum = ((CPDF_Reference*)pPageObj)->GetRefObjNum();
292 CFX_ByteString type = pKidDict->GetString(FX_BSTRC("Type"));
293 if (type == FX_BSTRC("MCR")) {
294 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
297 pKid->m_Type = CPDF_StructKid::StreamContent;
298 CPDF_Object* pStreamObj = pKidDict->GetElement(FX_BSTRC("Stm"));
299 if (pStreamObj && pStreamObj->GetType() == PDFOBJ_REFERENCE) {
300 pKid->m_StreamContent.m_RefObjNum = ((CPDF_Reference*)pStreamObj)->GetRefObjNum();
302 pKid->m_StreamContent.m_RefObjNum = 0;
304 pKid->m_StreamContent.m_PageObjNum = PageObjNum;
305 pKid->m_StreamContent.m_ContentId = pKidDict->GetInteger(FX_BSTRC("MCID"));
306 } else if (type == FX_BSTRC("OBJR")) {
307 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
310 pKid->m_Type = CPDF_StructKid::Object;
311 CPDF_Object* pObj = pKidDict->GetElement(FX_BSTRC("Obj"));
312 if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) {
313 pKid->m_Object.m_RefObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum();
315 pKid->m_Object.m_RefObjNum = 0;
317 pKid->m_Object.m_PageObjNum = PageObjNum;
319 pKid->m_Type = CPDF_StructKid::Element;
320 pKid->m_Element.m_pDict = pKidDict;
321 if (m_pTree->m_pPage == NULL) {
322 pKid->m_Element.m_pElement = FX_NEW CPDF_StructElementImpl(m_pTree, this, pKidDict);
324 pKid->m_Element.m_pElement = NULL;
328 static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs, FX_BSTR owner, FX_FLOAT nLevel = 0.0F)
330 if (nLevel > nMaxRecursion) {
333 if (pAttrs == NULL) {
336 CPDF_Dictionary* pDict = NULL;
337 if (pAttrs->GetType() == PDFOBJ_DICTIONARY) {
338 pDict = (CPDF_Dictionary*)pAttrs;
339 } else if (pAttrs->GetType() == PDFOBJ_STREAM) {
340 pDict = ((CPDF_Stream*)pAttrs)->GetDict();
341 } else if (pAttrs->GetType() == PDFOBJ_ARRAY) {
342 CPDF_Array* pArray = (CPDF_Array*)pAttrs;
343 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
344 CPDF_Object* pElement = pArray->GetElementValue(i);
345 pDict = FindAttrDict(pElement, owner, nLevel + 1);
351 if (pDict && pDict->GetString(FX_BSTRC("O")) == owner) {
356 CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, FX_FLOAT fLevel)
358 if (fLevel > nMaxRecursion) {
362 CPDF_Object* pAttr = GetAttr(owner, name, FALSE);
366 if (m_pParent == NULL) {
369 return m_pParent->GetAttr(owner, name, TRUE, fLevel + 1);
371 CPDF_Object* pA = m_pDict->GetElementValue(FX_BSTRC("A"));
373 CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner);
375 CPDF_Object* pAttr = pAttrDict->GetElementValue(name);
381 CPDF_Object* pC = m_pDict->GetElementValue(FX_BSTRC("C"));
385 CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDict(FX_BSTRC("ClassMap"));
386 if (pClassMap == NULL) {
389 if (pC->GetType() == PDFOBJ_ARRAY) {
390 CPDF_Array* pArray = (CPDF_Array*)pC;
391 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
392 CFX_ByteString class_name = pArray->GetString(i);
393 CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name);
394 if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) {
395 return pClassDict->GetElementValue(name);
400 CFX_ByteString class_name = pC->GetString();
401 CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name);
402 if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) {
403 return pClassDict->GetElementValue(name);
407 CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, int subindex)
409 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable);
410 if (pAttr == NULL || subindex == -1 || pAttr->GetType() != PDFOBJ_ARRAY) {
413 CPDF_Array* pArray = (CPDF_Array*)pAttr;
414 if (subindex >= (int)pArray->GetCount()) {
417 return pArray->GetElementValue(subindex);
419 CFX_ByteString CPDF_StructElementImpl::GetName(FX_BSTR owner, FX_BSTR name, FX_BSTR default_value, FX_BOOL bInheritable, int subindex)
421 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
422 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NAME) {
423 return default_value;
425 return pAttr->GetString();
427 FX_ARGB CPDF_StructElementImpl::GetColor(FX_BSTR owner, FX_BSTR name, FX_ARGB default_value, FX_BOOL bInheritable, int subindex)
429 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
430 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_ARRAY) {
431 return default_value;
433 CPDF_Array* pArray = (CPDF_Array*)pAttr;
434 return 0xff000000 | ((int)(pArray->GetNumber(0) * 255) << 16) | ((int)(pArray->GetNumber(1) * 255) << 8) | (int)(pArray->GetNumber(2) * 255);
436 FX_FLOAT CPDF_StructElementImpl::GetNumber(FX_BSTR owner, FX_BSTR name, FX_FLOAT default_value, FX_BOOL bInheritable, int subindex)
438 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
439 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) {
440 return default_value;
442 return pAttr->GetNumber();
444 int CPDF_StructElementImpl::GetInteger(FX_BSTR owner, FX_BSTR name, int default_value, FX_BOOL bInheritable, int subindex)
446 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
447 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) {
448 return default_value;
450 return pAttr->GetInteger();