1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
7 #include "../../../include/fpdfapi/fpdf_parser.h"
8 #include "../../../include/fpdfapi/fpdf_module.h"
9 #include "../../../include/fpdfapi/fpdf_page.h"
10 #include "../fpdf_page/pageint.h"
12 #define _PARSER_OBJECT_LEVLE_ 64
13 extern const FX_LPCSTR _PDF_CharType;
14 FX_BOOL IsSignatureDict(const CPDF_Dictionary* pDict)
16 CPDF_Object* pType = pDict->GetElementValue(FX_BSTRC("Type"));
18 pType = pDict->GetElementValue(FX_BSTRC("FT"));
23 if (pType->GetString() == FX_BSTRC("Sig")) {
28 static FX_INT32 _CompareDWord(const void* p1, const void* p2)
30 return (*(FX_DWORD*)p1) - (*(FX_DWORD*)p2);
32 static int _CompareFileSize(const void* p1, const void* p2)
34 FX_FILESIZE ret = (*(FX_FILESIZE*)p1) - (*(FX_FILESIZE*)p2);
43 CPDF_Parser::CPDF_Parser()
47 m_pEncryptDict = NULL;
48 m_pSecurityHandler = NULL;
51 m_dwXrefStartObjNum = 0;
52 m_bOwnFileRead = TRUE;
53 m_bForceUseSecurityHandler = FALSE;
55 CPDF_Parser::~CPDF_Parser()
59 FX_DWORD CPDF_Parser::GetLastObjNum()
61 FX_DWORD dwSize = m_CrossRef.GetSize();
62 return dwSize ? dwSize - 1 : 0;
64 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict)
66 m_pEncryptDict = pDict;
68 void CPDF_Parser::CloseParser(FX_BOOL bReParse)
70 m_bVersionUpdated = FALSE;
71 if (m_pDocument && !bReParse) {
76 m_pTrailer->Release();
79 ReleaseEncryptHandler();
80 SetEncryptDictionary(NULL);
81 if (m_bOwnFileRead && m_Syntax.m_pFileAccess != NULL) {
82 m_Syntax.m_pFileAccess->Release();
83 m_Syntax.m_pFileAccess = NULL;
85 FX_POSITION pos = m_ObjectStreamMap.GetStartPosition();
88 CPDF_StreamAcc* pStream;
89 m_ObjectStreamMap.GetNextAssoc(pos, objnum, (void*&)pStream);
92 m_ObjectStreamMap.RemoveAll();
93 m_SortedOffset.RemoveAll();
94 m_CrossRef.RemoveAll();
96 m_ObjVersion.RemoveAll();
97 FX_INT32 iLen = m_Trailers.GetSize();
98 for (FX_INT32 i = 0; i < iLen; ++i) {
99 m_Trailers.GetAt(i)->Release();
101 m_Trailers.RemoveAll();
103 m_pLinearized->Release();
104 m_pLinearized = NULL;
107 static FX_INT32 GetHeaderOffset(IFX_FileRead* pFile)
109 FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025);
113 if (!pFile->ReadBlock(buf, offset, 4)) {
116 if (*(FX_DWORD*)buf == tag) {
126 FX_DWORD CPDF_Parser::StartParse(FX_LPCSTR filename, FX_BOOL bReParse)
128 IFX_FileRead* pFileAccess = FX_CreateFileRead(filename);
130 return PDFPARSE_ERROR_FILE;
132 return StartParse(pFileAccess, bReParse);
134 FX_DWORD CPDF_Parser::StartParse(FX_LPCWSTR filename, FX_BOOL bReParse)
136 IFX_FileRead* pFileAccess = FX_CreateFileRead(filename);
138 return PDFPARSE_ERROR_FILE;
140 return StartParse(pFileAccess, bReParse);
142 CPDF_SecurityHandler* FPDF_CreateStandardSecurityHandler();
143 CPDF_SecurityHandler* FPDF_CreatePubKeyHandler(void*);
144 FX_DWORD CPDF_Parser::StartParse(IFX_FileRead* pFileAccess, FX_BOOL bReParse, FX_BOOL bOwnFileRead)
146 CloseParser(bReParse);
147 m_bXRefStream = FALSE;
148 m_LastXRefOffset = 0;
149 m_bOwnFileRead = bOwnFileRead;
150 FX_INT32 offset = GetHeaderOffset(pFileAccess);
152 if (bOwnFileRead && pFileAccess) {
153 pFileAccess->Release();
155 return PDFPARSE_ERROR_FORMAT;
157 m_Syntax.InitParser(pFileAccess, offset);
159 m_Syntax.GetCharAt(5, ch);
160 m_FileVersion = (ch - '0') * 10;
161 m_Syntax.GetCharAt(7, ch);
162 m_FileVersion += ch - '0';
163 m_Syntax.RestorePos(m_Syntax.m_FileLen - m_Syntax.m_HeaderOffset - 9);
165 m_pDocument = FX_NEW CPDF_Document(this);
167 FX_BOOL bXRefRebuilt = FALSE;
168 if (m_Syntax.SearchWord(FX_BSTRC("startxref"), TRUE, FALSE, 4096)) {
169 FX_FILESIZE startxref_offset = m_Syntax.SavePos();
170 FX_LPVOID pResult = FXSYS_bsearch(&startxref_offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
171 if (pResult == NULL) {
172 m_SortedOffset.Add(startxref_offset);
174 m_Syntax.GetKeyword();
176 CFX_ByteString xrefpos_str = m_Syntax.GetNextWord(bNumber);
178 return PDFPARSE_ERROR_FORMAT;
180 m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);
181 if (!LoadAllCrossRefV4(m_LastXRefOffset) && !LoadAllCrossRefV5(m_LastXRefOffset)) {
182 if (!RebuildCrossRef()) {
183 return PDFPARSE_ERROR_FORMAT;
186 m_LastXRefOffset = 0;
189 if (!RebuildCrossRef()) {
190 return PDFPARSE_ERROR_FORMAT;
194 FX_DWORD dwRet = SetEncryptHandler();
195 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
198 m_pDocument->LoadDoc();
199 if (m_pDocument->GetRoot() == NULL || m_pDocument->GetPageCount() == 0) {
201 return PDFPARSE_ERROR_FORMAT;
203 ReleaseEncryptHandler();
204 if (!RebuildCrossRef()) {
205 return PDFPARSE_ERROR_FORMAT;
207 dwRet = SetEncryptHandler();
208 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
211 m_pDocument->LoadDoc();
212 if (m_pDocument->GetRoot() == NULL) {
213 return PDFPARSE_ERROR_FORMAT;
216 FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
217 FX_DWORD RootObjNum = GetRootObjNum();
218 if (RootObjNum == 0) {
219 ReleaseEncryptHandler();
221 RootObjNum = GetRootObjNum();
222 if (RootObjNum == 0) {
223 return PDFPARSE_ERROR_FORMAT;
225 dwRet = SetEncryptHandler();
226 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
230 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
231 CPDF_Reference* pMetadata = (CPDF_Reference*)m_pDocument->GetRoot()->GetElement(FX_BSTRC("Metadata"));
232 if (pMetadata && pMetadata->GetType() == PDFOBJ_REFERENCE) {
233 m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum();
236 return PDFPARSE_ERROR_SUCCESS;
238 FX_DWORD CPDF_Parser::SetEncryptHandler()
240 ReleaseEncryptHandler();
241 SetEncryptDictionary(NULL);
242 if (m_pTrailer == NULL) {
243 return PDFPARSE_ERROR_FORMAT;
245 CPDF_Object* pEncryptObj = m_pTrailer->GetElement(FX_BSTRC("Encrypt"));
247 if (pEncryptObj->GetType() == PDFOBJ_DICTIONARY) {
248 SetEncryptDictionary((CPDF_Dictionary*)pEncryptObj);
249 } else if (pEncryptObj->GetType() == PDFOBJ_REFERENCE) {
250 pEncryptObj = m_pDocument->GetIndirectObject(((CPDF_Reference*)pEncryptObj)->GetRefObjNum());
252 SetEncryptDictionary(pEncryptObj->GetDict());
256 if (m_bForceUseSecurityHandler) {
257 FX_DWORD err = PDFPARSE_ERROR_HANDLER;
258 if (m_pSecurityHandler == NULL) {
259 return PDFPARSE_ERROR_HANDLER;
261 if (!m_pSecurityHandler->OnInit(this, m_pEncryptDict)) {
264 CPDF_CryptoHandler* pCryptoHandler = m_pSecurityHandler->CreateCryptoHandler();
265 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler)) {
266 delete pCryptoHandler;
267 pCryptoHandler = NULL;
268 return PDFPARSE_ERROR_HANDLER;
270 m_Syntax.SetEncrypt(pCryptoHandler);
271 } else if (m_pEncryptDict) {
272 CFX_ByteString filter = m_pEncryptDict->GetString(FX_BSTRC("Filter"));
273 CPDF_SecurityHandler* pSecurityHandler = NULL;
274 FX_DWORD err = PDFPARSE_ERROR_HANDLER;
275 if (filter == FX_BSTRC("Standard")) {
276 pSecurityHandler = FPDF_CreateStandardSecurityHandler();
277 err = PDFPARSE_ERROR_PASSWORD;
279 if (pSecurityHandler == NULL) {
280 return PDFPARSE_ERROR_HANDLER;
282 if (!pSecurityHandler->OnInit(this, m_pEncryptDict)) {
283 delete pSecurityHandler;
284 pSecurityHandler = NULL;
287 m_pSecurityHandler = pSecurityHandler;
288 CPDF_CryptoHandler* pCryptoHandler = pSecurityHandler->CreateCryptoHandler();
289 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler)) {
290 delete pCryptoHandler;
291 pCryptoHandler = NULL;
292 return PDFPARSE_ERROR_HANDLER;
294 m_Syntax.SetEncrypt(pCryptoHandler);
296 return PDFPARSE_ERROR_SUCCESS;
298 void CPDF_Parser::ReleaseEncryptHandler()
300 if (m_Syntax.m_pCryptoHandler) {
301 delete m_Syntax.m_pCryptoHandler;
302 m_Syntax.m_pCryptoHandler = NULL;
304 if (m_pSecurityHandler && !m_bForceUseSecurityHandler) {
305 delete m_pSecurityHandler;
306 m_pSecurityHandler = NULL;
309 FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum)
311 if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
314 if (m_V5Type[objnum] == 1) {
315 return m_CrossRef[objnum];
317 if (m_V5Type[objnum] == 2) {
318 return m_CrossRef[(FX_INT32)m_CrossRef[objnum]];
322 static FX_INT32 GetDirectInteger(CPDF_Dictionary* pDict, FX_BSTR key)
324 CPDF_Object* pObj = pDict->GetElement(key);
328 if (pObj->GetType() == PDFOBJ_NUMBER) {
329 return ((CPDF_Number*)pObj)->GetInteger();
333 static FX_BOOL CheckDirectType(CPDF_Dictionary* pDict, FX_BSTR key, FX_INT32 iType)
335 CPDF_Object* pObj = pDict->GetElement(key);
339 return pObj->GetType() == iType;
341 FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos)
343 if (!LoadCrossRefV4(xrefpos, 0, TRUE, FALSE)) {
346 m_pTrailer = LoadTrailerV4();
347 if (m_pTrailer == NULL) {
350 FX_INT32 xrefsize = GetDirectInteger(m_pTrailer, FX_BSTRC("Size"));
351 if (xrefsize <= 0 || xrefsize > (1 << 20)) {
354 m_CrossRef.SetSize(xrefsize);
355 m_V5Type.SetSize(xrefsize);
356 CFX_FileSizeArray CrossRefList, XRefStreamList;
357 CrossRefList.Add(xrefpos);
358 XRefStreamList.Add(GetDirectInteger(m_pTrailer, FX_BSTRC("XRefStm")));
359 if (!CheckDirectType(m_pTrailer, FX_BSTRC("Prev"), PDFOBJ_NUMBER)) {
362 FX_FILESIZE newxrefpos = GetDirectInteger(m_pTrailer, FX_BSTRC("Prev"));
363 if (newxrefpos == xrefpos) {
366 xrefpos = newxrefpos;
368 CrossRefList.InsertAt(0, xrefpos);
369 LoadCrossRefV4(xrefpos, 0, TRUE, FALSE);
370 CPDF_Dictionary* pDict = LoadTrailerV4();
374 if (!CheckDirectType(pDict, FX_BSTRC("Prev"), PDFOBJ_NUMBER)) {
378 newxrefpos = GetDirectInteger(pDict, FX_BSTRC("Prev"));
379 if (newxrefpos == xrefpos) {
383 xrefpos = newxrefpos;
384 XRefStreamList.InsertAt(0, pDict->GetInteger(FX_BSTRC("XRefStm")));
385 m_Trailers.Add(pDict);
387 for (FX_INT32 i = 0; i < CrossRefList.GetSize(); i ++)
388 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE, i == 0)) {
393 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos, FX_DWORD dwObjCount)
395 if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount)) {
398 m_pTrailer = LoadTrailerV4();
399 if (m_pTrailer == NULL) {
402 FX_INT32 xrefsize = GetDirectInteger(m_pTrailer, FX_BSTRC("Size"));
406 CFX_FileSizeArray CrossRefList, XRefStreamList;
407 CrossRefList.Add(xrefpos);
408 XRefStreamList.Add(GetDirectInteger(m_pTrailer, FX_BSTRC("XRefStm")));
409 xrefpos = GetDirectInteger(m_pTrailer, FX_BSTRC("Prev"));
411 CrossRefList.InsertAt(0, xrefpos);
412 LoadCrossRefV4(xrefpos, 0, TRUE, FALSE);
413 CPDF_Dictionary* pDict = LoadTrailerV4();
417 xrefpos = GetDirectInteger(pDict, FX_BSTRC("Prev"));
418 XRefStreamList.InsertAt(0, pDict->GetInteger(FX_BSTRC("XRefStm")));
419 m_Trailers.Add(pDict);
421 for (FX_INT32 i = 1; i < CrossRefList.GetSize(); i ++)
422 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE, i == 0)) {
427 FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount)
429 FX_FILESIZE dwStartPos = pos - m_Syntax.m_HeaderOffset;
430 m_Syntax.RestorePos(dwStartPos);
431 FX_LPVOID pResult = FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
432 if (pResult == NULL) {
433 m_SortedOffset.Add(pos);
435 FX_DWORD start_objnum = 0;
436 FX_DWORD count = dwObjCount;
437 FX_FILESIZE SavedPos = m_Syntax.SavePos();
438 FX_INT32 recordsize = 20;
439 char* pBuf = FX_Alloc(char, 1024 * recordsize + 1);
440 pBuf[1024 * recordsize] = '\0';
441 FX_INT32 nBlocks = count / 1024 + 1;
442 for (FX_INT32 block = 0; block < nBlocks; block ++) {
443 FX_INT32 block_size = block == nBlocks - 1 ? count % 1024 : 1024;
444 FX_DWORD dwReadSize = block_size * recordsize;
445 if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_Syntax.m_FileLen) {
449 if (!m_Syntax.ReadBlock((FX_LPBYTE)pBuf, dwReadSize)) {
453 for (FX_INT32 i = 0; i < block_size; i ++) {
454 FX_DWORD objnum = start_objnum + block * 1024 + i;
455 char* pEntry = pBuf + i * recordsize;
456 if (pEntry[17] == 'f') {
457 m_CrossRef.SetAtGrow(objnum, 0);
458 m_V5Type.SetAtGrow(objnum, 0);
460 FX_INT32 offset = FXSYS_atoi(pEntry);
462 for (FX_INT32 c = 0; c < 10; c ++) {
463 if (pEntry[c] < '0' || pEntry[c] > '9') {
469 m_CrossRef.SetAtGrow(objnum, offset);
470 FX_INT32 version = FXSYS_atoi(pEntry + 11);
472 m_bVersionUpdated = TRUE;
474 m_ObjVersion.SetAtGrow(objnum, version);
475 if (m_CrossRef[objnum] < m_Syntax.m_FileLen) {
476 FX_LPVOID pResult = FXSYS_bsearch(&m_CrossRef[objnum], m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
477 if (pResult == NULL) {
478 m_SortedOffset.Add(m_CrossRef[objnum]);
481 m_V5Type.SetAtGrow(objnum, 1);
486 m_Syntax.RestorePos(SavedPos + count * recordsize);
489 FX_BOOL CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip, FX_BOOL bFirst)
491 m_Syntax.RestorePos(pos);
492 if (m_Syntax.GetKeyword() != FX_BSTRC("xref")) {
495 FX_LPVOID pResult = FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
496 if (pResult == NULL) {
497 m_SortedOffset.Add(pos);
500 FX_LPVOID pResult = FXSYS_bsearch(&streampos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
501 if (pResult == NULL) {
502 m_SortedOffset.Add(streampos);
506 FX_FILESIZE SavedPos = m_Syntax.SavePos();
508 CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
509 if (word.IsEmpty()) {
513 m_Syntax.RestorePos(SavedPos);
516 FX_DWORD start_objnum = FXSYS_atoi(word);
517 if (start_objnum >= (1 << 20)) {
520 FX_DWORD count = m_Syntax.GetDirectNum();
521 m_Syntax.ToNextWord();
522 SavedPos = m_Syntax.SavePos();
523 FX_BOOL bFirstItem = FALSE;
524 FX_INT32 recordsize = 20;
528 m_dwXrefStartObjNum = start_objnum;
530 char* pBuf = FX_Alloc(char, 1024 * recordsize + 1);
531 pBuf[1024 * recordsize] = '\0';
532 FX_INT32 nBlocks = count / 1024 + 1;
533 FX_BOOL bFirstBlock = TRUE;
534 for (FX_INT32 block = 0; block < nBlocks; block ++) {
535 FX_INT32 block_size = block == nBlocks - 1 ? count % 1024 : 1024;
536 m_Syntax.ReadBlock((FX_LPBYTE)pBuf, block_size * recordsize);
537 for (FX_INT32 i = 0; i < block_size; i ++) {
538 FX_DWORD objnum = start_objnum + block * 1024 + i;
539 char* pEntry = pBuf + i * recordsize;
540 if (pEntry[17] == 'f') {
546 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
547 FX_INT32 version = FXSYS_atoi(pEntry + 11);
548 if (offset == 0 && version == 65535 && start_objnum != 0) {
553 m_CrossRef.SetAtGrow(objnum, 0);
554 m_V5Type.SetAtGrow(objnum, 0);
556 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
558 for (FX_INT32 c = 0; c < 10; c ++) {
559 if (pEntry[c] < '0' || pEntry[c] > '9') {
565 m_CrossRef.SetAtGrow(objnum, offset);
566 FX_INT32 version = FXSYS_atoi(pEntry + 11);
568 m_bVersionUpdated = TRUE;
570 m_ObjVersion.SetAtGrow(objnum, version);
571 if (m_CrossRef[objnum] < m_Syntax.m_FileLen) {
572 FX_LPVOID pResult = FXSYS_bsearch(&m_CrossRef[objnum], m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
573 if (pResult == NULL) {
574 m_SortedOffset.Add(m_CrossRef[objnum]);
577 m_V5Type.SetAtGrow(objnum, 1);
586 m_Syntax.RestorePos(SavedPos + count * recordsize);
589 if (!LoadCrossRefV5(streampos, streampos, FALSE)) {
594 FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos)
596 if (!LoadCrossRefV5(xrefpos, xrefpos, TRUE)) {
600 if (!LoadCrossRefV5(xrefpos, xrefpos, FALSE)) {
603 m_ObjectStreamMap.InitHashTable(101, FALSE);
604 m_bXRefStream = TRUE;
607 FX_BOOL CPDF_Parser::RebuildCrossRef()
609 m_CrossRef.RemoveAll();
610 m_V5Type.RemoveAll();
611 m_SortedOffset.RemoveAll();
612 m_ObjVersion.RemoveAll();
614 m_pTrailer->Release();
618 FX_INT32 inside_index = 0;
619 FX_DWORD objnum, gennum;
621 FX_LPBYTE buffer = FX_Alloc(FX_BYTE, 4096);
622 FX_FILESIZE pos = m_Syntax.m_HeaderOffset;
623 FX_FILESIZE start_pos, start_pos1;
624 FX_FILESIZE last_obj = -1, last_xref = -1, last_trailer = -1;
625 FX_BOOL bInUpdate = FALSE;
626 while (pos < m_Syntax.m_FileLen) {
627 FX_BOOL bOverFlow = FALSE;
628 FX_DWORD size = (FX_DWORD)(m_Syntax.m_FileLen - pos);
632 if (!m_Syntax.m_pFileAccess->ReadBlock(buffer, pos, size)) {
635 for (FX_DWORD i = 0; i < size; i ++) {
636 FX_BYTE byte = buffer[i];
637 FX_LPBYTE pData = buffer + i;
640 if (_PDF_CharType[byte] == 'W') {
643 if (byte <= '9' && byte >= '0') {
668 if (_PDF_CharType[byte] == 'W') {
670 } else if (byte <= '9' && byte >= '0') {
674 } else if (byte == 't') {
677 } else if (byte == 'x') {
686 if (byte <= '9' && byte >= '0') {
687 objnum = objnum * 10 + byte - '0';
689 } else if (_PDF_CharType[byte] == 'W') {
698 if (byte <= '9' && byte >= '0') {
699 start_pos1 = pos + i;
702 } else if (_PDF_CharType[byte] == 'W') {
704 } else if (byte == 't') {
713 if (byte <= '9' && byte >= '0') {
714 gennum = gennum * 10 + byte - '0';
716 } else if (_PDF_CharType[byte] == 'W') {
727 } else if (_PDF_CharType[byte] == 'W') {
729 } else if (byte <= '9' && byte >= '0') {
732 start_pos = start_pos1;
733 start_pos1 = pos + i;
735 } else if (byte == 't') {
744 switch (inside_index) {
762 if (_PDF_CharType[byte] == 'W' || _PDF_CharType[byte] == 'D') {
763 if (objnum > 0x1000000) {
767 FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset;
768 last_obj = start_pos;
769 FX_LPVOID pResult = FXSYS_bsearch(&obj_pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
770 if (pResult == NULL) {
771 m_SortedOffset.Add(obj_pos);
773 FX_FILESIZE obj_end = 0;
774 CPDF_Object *pObject = ParseIndirectObjectAtByStrict(m_pDocument, obj_pos, objnum, NULL, &obj_end);
776 int iType = pObject->GetType();
777 if (iType == PDFOBJ_STREAM) {
778 CPDF_Stream* pStream = (CPDF_Stream*)pObject;
779 CPDF_Dictionary* pDict = pStream->GetDict();
781 if (pDict->KeyExist(FX_BSTRC("Type"))) {
782 CFX_ByteString bsValue = pDict->GetString(FX_BSTRC("Type"));
783 if (bsValue == FX_BSTRC("XRef") && pDict->KeyExist(FX_BSTRC("Size"))) {
784 CPDF_Object* pRoot = pDict->GetElement(FX_BSTRC("Root"));
785 if (pRoot && pRoot->GetDict() && pRoot->GetDict()->GetElement(FX_BSTRC("Pages"))) {
787 m_pTrailer->Release();
789 m_pTrailer = (CPDF_Dictionary*)pDict->Clone();
796 FX_FILESIZE offset = 0;
797 m_Syntax.RestorePos(obj_pos);
798 offset = m_Syntax.FindTag(FX_BSTRC("obj"), 0);
804 FX_FILESIZE nLen = obj_end - obj_pos - offset;
805 if ((FX_DWORD)nLen > size - i) {
806 pos = obj_end + m_Syntax.m_HeaderOffset;
811 if (m_CrossRef.GetSize() > (FX_INT32)objnum && m_CrossRef[objnum]) {
813 FX_DWORD oldgen = m_ObjVersion.GetAt(objnum);
814 m_CrossRef[objnum] = obj_pos;
815 m_ObjVersion.SetAt(objnum, (FX_SHORT)gennum);
816 if (oldgen != gennum) {
817 m_bVersionUpdated = TRUE;
821 m_CrossRef.SetAtGrow(objnum, obj_pos);
822 m_V5Type.SetAtGrow(objnum, 1);
823 m_ObjVersion.SetAtGrow(objnum, (FX_SHORT)gennum);
835 if (inside_index == 7) {
836 if (_PDF_CharType[byte] == 'W' || _PDF_CharType[byte] == 'D') {
837 last_trailer = pos + i - 7;
838 m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset);
839 CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, 0);
841 if (pObj->GetType() != PDFOBJ_DICTIONARY && pObj->GetType() != PDFOBJ_STREAM) {
844 CPDF_Dictionary* pTrailer = NULL;
845 if (pObj->GetType() == PDFOBJ_STREAM) {
846 pTrailer = ((CPDF_Stream*)pObj)->GetDict();
848 pTrailer = (CPDF_Dictionary*)pObj;
852 CPDF_Object* pRoot = pTrailer->GetElement(FX_BSTRC("Root"));
853 if (pRoot == NULL || (pRoot->GetType() == PDFOBJ_REFERENCE &&
854 (FX_DWORD)m_CrossRef.GetSize() > ((CPDF_Reference*)pRoot)->GetRefObjNum() &&
855 m_CrossRef.GetAt(((CPDF_Reference*)pRoot)->GetRefObjNum()) != 0)) {
856 FX_POSITION pos = pTrailer->GetStartPos();
859 CPDF_Object* pObj = pTrailer->GetNextElement(pos, key);
860 m_pTrailer->SetAt(key, pObj->Clone(), m_pDocument);
867 if (pObj->GetType() == PDFOBJ_STREAM) {
868 m_pTrailer = (CPDF_Dictionary*)pTrailer->Clone();
871 m_pTrailer = pTrailer;
873 FX_FILESIZE dwSavePos = m_Syntax.SavePos();
874 CFX_ByteString strWord = m_Syntax.GetKeyword();
875 if (!strWord.Compare(FX_BSTRC("startxref"))) {
876 FX_BOOL bNumber = FALSE;
877 CFX_ByteString bsOffset = m_Syntax.GetNextWord(bNumber);
879 m_LastXRefOffset = FXSYS_atoi(bsOffset);
882 m_Syntax.RestorePos(dwSavePos);
893 } else if (byte == "trailer"[inside_index]) {
901 if (inside_index == 4) {
902 last_xref = pos + i - 4;
904 } else if (byte == "xref"[inside_index]) {
912 if (byte == '\r' || byte == '\n') {
921 } else if (byte == '(') {
929 if (byte == '<' && inside_index == 1) {
931 } else if (byte == '>') {
941 if (_PDF_CharType[byte] == 'D' || _PDF_CharType[byte] == 'W') {
947 if (_PDF_CharType[byte] == 'W') {
949 } else if (byte == '%' || byte == '(' || byte == '<' || byte == '\\') {
952 } else if (inside_index == 6) {
955 } else if (byte == "endobj"[inside_index]) {
967 if (last_xref != -1 && last_xref > last_obj) {
968 last_trailer = last_xref;
969 } else if (last_trailer == -1 || last_xref < last_obj) {
970 last_trailer = m_Syntax.m_FileLen;
972 FX_FILESIZE offset = last_trailer - m_Syntax.m_HeaderOffset;
973 FX_LPVOID pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
974 if (pResult == NULL) {
975 m_SortedOffset.Add(offset);
980 static FX_DWORD _GetVarInt(FX_LPCBYTE p, FX_INT32 n)
983 for (FX_INT32 i = 0; i < n; i ++) {
984 result = result * 256 + p[i];
988 FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE pos, FX_FILESIZE& prev, FX_BOOL bMainXRef)
990 CPDF_Stream* pStream = (CPDF_Stream*)ParseIndirectObjectAt(m_pDocument, pos, 0, NULL);
995 m_pDocument->InsertIndirectObject(pStream->m_ObjNum, pStream);
997 if (pStream->GetType() != PDFOBJ_STREAM) {
1000 prev = pStream->GetDict()->GetInteger(FX_BSTRC("Prev"));
1001 FX_INT32 size = pStream->GetDict()->GetInteger(FX_BSTRC("Size"));
1007 m_pTrailer = (CPDF_Dictionary*)pStream->GetDict()->Clone();
1008 m_CrossRef.SetSize(size);
1009 if (m_V5Type.SetSize(size)) {
1010 FXSYS_memset32(m_V5Type.GetData(), 0, size);
1013 m_Trailers.Add((CPDF_Dictionary*)pStream->GetDict()->Clone());
1015 CFX_DWordArray IndexArray, WidthArray;
1017 CPDF_Array* pArray = pStream->GetDict()->GetArray(FX_BSTRC("Index"));
1018 if (pArray == NULL) {
1020 IndexArray.Add(size);
1023 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
1024 IndexArray.Add(pArray->GetInteger(i));
1026 nSegs = pArray->GetCount() / 2;
1028 pArray = pStream->GetDict()->GetArray(FX_BSTRC("W"));
1029 if (pArray == NULL) {
1033 FX_DWORD totalwidth = 0;
1035 for (i = 0; i < pArray->GetCount(); i ++) {
1036 WidthArray.Add(pArray->GetInteger(i));
1037 if (totalwidth + WidthArray[i] < totalwidth) {
1041 totalwidth += WidthArray[i];
1043 if (totalwidth == 0 || WidthArray.GetSize() < 3) {
1048 acc.LoadAllData(pStream);
1049 FX_LPCBYTE pData = acc.GetData();
1050 FX_DWORD dwTotalSize = acc.GetSize();
1051 FX_DWORD segindex = 0;
1052 for (i = 0; i < nSegs; i ++) {
1053 FX_INT32 startnum = IndexArray[i * 2];
1057 m_dwXrefStartObjNum = startnum;
1058 FX_DWORD count = IndexArray[i * 2 + 1];
1059 if (segindex + count < segindex || segindex + count == 0 ||
1060 (FX_DWORD)totalwidth >= UINT_MAX / (segindex + count) || (segindex + count) * (FX_DWORD)totalwidth > dwTotalSize) {
1063 FX_LPCBYTE segstart = pData + segindex * (FX_DWORD)totalwidth;
1064 if ((FX_DWORD)startnum + count < (FX_DWORD)startnum ||
1065 (FX_DWORD)startnum + count > (FX_DWORD)m_V5Type.GetSize()) {
1068 for (FX_DWORD j = 0; j < count; j ++) {
1070 FX_LPCBYTE entrystart = segstart + j * totalwidth;
1071 if (WidthArray[0]) {
1072 type = _GetVarInt(entrystart, WidthArray[0]);
1074 if (m_V5Type[startnum + j] == 255) {
1075 FX_FILESIZE offset = _GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1076 m_CrossRef[startnum + j] = offset;
1077 FX_LPVOID pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1078 if (pResult == NULL) {
1079 m_SortedOffset.Add(offset);
1083 if (m_V5Type[startnum + j]) {
1086 m_V5Type[startnum + j] = type;
1088 m_CrossRef[startnum + j] = 0;
1090 FX_FILESIZE offset = _GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1091 m_CrossRef[startnum + j] = offset;
1093 FX_LPVOID pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1094 if (pResult == NULL) {
1095 m_SortedOffset.Add(offset);
1098 if (offset < 0 || offset >= m_V5Type.GetSize()) {
1102 m_V5Type[offset] = 255;
1111 CPDF_Array* CPDF_Parser::GetIDArray()
1113 CPDF_Object* pID = m_pTrailer->GetElement(FX_BSTRC("ID"));
1117 if (pID->GetType() == PDFOBJ_REFERENCE) {
1118 pID = ParseIndirectObject(NULL, ((CPDF_Reference*)pID)->GetRefObjNum());
1119 m_pTrailer->SetAt(FX_BSTRC("ID"), pID);
1121 if (pID == NULL || pID->GetType() != PDFOBJ_ARRAY) {
1124 return (CPDF_Array*)pID;
1126 FX_DWORD CPDF_Parser::GetRootObjNum()
1128 CPDF_Reference* pRef = (CPDF_Reference*)m_pTrailer->GetElement(FX_BSTRC("Root"));
1129 if (pRef == NULL || pRef->GetType() != PDFOBJ_REFERENCE) {
1132 return pRef->GetRefObjNum();
1134 FX_DWORD CPDF_Parser::GetInfoObjNum()
1136 CPDF_Reference* pRef = (CPDF_Reference*)m_pTrailer->GetElement(FX_BSTRC("Info"));
1137 if (pRef == NULL || pRef->GetType() != PDFOBJ_REFERENCE) {
1140 return pRef->GetRefObjNum();
1142 FX_BOOL CPDF_Parser::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm)
1145 if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
1148 if (m_V5Type[objnum] == 0) {
1151 if (m_V5Type[objnum] == 2) {
1154 FX_FILESIZE pos = m_CrossRef[objnum];
1155 FX_LPVOID pResult = FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1156 if (pResult == NULL) {
1159 if ((FX_FILESIZE*)pResult - (FX_FILESIZE*)m_SortedOffset.GetData() == m_SortedOffset.GetSize() - 1) {
1162 FX_FILESIZE size = ((FX_FILESIZE*)pResult)[1] - pos;
1163 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1164 m_Syntax.RestorePos(pos);
1165 bForm = m_Syntax.SearchMultiWord(FX_BSTRC("/Form\0stream"), TRUE, size) == 0;
1166 m_Syntax.RestorePos(SavedPos);
1169 CPDF_Object* CPDF_Parser::ParseIndirectObject(CPDF_IndirectObjects* pObjList, FX_DWORD objnum, PARSE_CONTEXT* pContext)
1171 if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
1174 if (m_V5Type[objnum] == 1 || m_V5Type[objnum] == 255) {
1175 FX_FILESIZE pos = m_CrossRef[objnum];
1179 return ParseIndirectObjectAt(pObjList, pos, objnum, pContext);
1181 if (m_V5Type[objnum] == 2) {
1182 CPDF_StreamAcc* pObjStream = GetObjectStream((FX_DWORD)m_CrossRef[objnum]);
1183 if (pObjStream == NULL) {
1186 FX_INT32 n = pObjStream->GetDict()->GetInteger(FX_BSTRC("N"));
1187 FX_INT32 offset = pObjStream->GetDict()->GetInteger(FX_BSTRC("First"));
1188 CPDF_SyntaxParser syntax;
1189 CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream((FX_LPBYTE)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE));
1190 syntax.InitParser((IFX_FileStream*)file, 0);
1191 CPDF_Object* pRet = NULL;
1193 FX_DWORD thisnum = syntax.GetDirectNum();
1194 FX_DWORD thisoff = syntax.GetDirectNum();
1195 if (thisnum == objnum) {
1196 syntax.RestorePos(offset + thisoff);
1197 pRet = syntax.GetObject(pObjList, 0, 0, 0, pContext);
1206 CPDF_StreamAcc* CPDF_Parser::GetObjectStream(FX_DWORD objnum)
1208 CPDF_StreamAcc* pStreamAcc = NULL;
1209 if (m_ObjectStreamMap.Lookup((void*)(FX_UINTPTR)objnum, (void*&)pStreamAcc)) {
1212 const CPDF_Stream* pStream = (CPDF_Stream*)m_pDocument->GetIndirectObject(objnum);
1213 if (pStream == NULL || pStream->GetType() != PDFOBJ_STREAM) {
1216 pStreamAcc = FX_NEW CPDF_StreamAcc;
1217 pStreamAcc->LoadAllData(pStream);
1218 m_ObjectStreamMap.SetAt((void*)(FX_UINTPTR)objnum, pStreamAcc);
1221 FX_FILESIZE CPDF_Parser::GetObjectSize(FX_DWORD objnum)
1223 if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
1226 if (m_V5Type[objnum] == 2) {
1227 objnum = (FX_DWORD)m_CrossRef[objnum];
1229 if (m_V5Type[objnum] == 1 || m_V5Type[objnum] == 255) {
1230 FX_FILESIZE offset = m_CrossRef[objnum];
1234 FX_LPVOID pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1235 if (pResult == NULL) {
1238 if ((FX_FILESIZE*)pResult - (FX_FILESIZE*)m_SortedOffset.GetData() == m_SortedOffset.GetSize() - 1) {
1241 return ((FX_FILESIZE*)pResult)[1] - offset;
1245 void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum, FX_LPBYTE& pBuffer, FX_DWORD& size)
1249 if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
1252 if (m_V5Type[objnum] == 2) {
1253 CPDF_StreamAcc* pObjStream = GetObjectStream((FX_DWORD)m_CrossRef[objnum]);
1254 if (pObjStream == NULL) {
1257 FX_INT32 n = pObjStream->GetDict()->GetInteger(FX_BSTRC("N"));
1258 FX_INT32 offset = pObjStream->GetDict()->GetInteger(FX_BSTRC("First"));
1259 CPDF_SyntaxParser syntax;
1260 FX_LPCBYTE pData = pObjStream->GetData();
1261 FX_DWORD totalsize = pObjStream->GetSize();
1262 CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream((FX_LPBYTE)pData, (size_t)totalsize, FALSE));
1263 syntax.InitParser((IFX_FileStream*)file, 0);
1265 FX_DWORD thisnum = syntax.GetDirectNum();
1266 FX_DWORD thisoff = syntax.GetDirectNum();
1267 if (thisnum == objnum) {
1269 size = totalsize - (thisoff + offset);
1271 FX_DWORD nextnum = syntax.GetDirectNum();
1272 FX_DWORD nextoff = syntax.GetDirectNum();
1273 size = nextoff - thisoff;
1275 pBuffer = FX_Alloc(FX_BYTE, size);
1276 FXSYS_memcpy32(pBuffer, pData + thisoff + offset, size);
1283 if (m_V5Type[objnum] == 1) {
1284 FX_FILESIZE pos = m_CrossRef[objnum];
1288 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1289 m_Syntax.RestorePos(pos);
1291 CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
1293 m_Syntax.RestorePos(SavedPos);
1296 FX_DWORD real_objnum = FXSYS_atoi(word);
1297 if (real_objnum && real_objnum != objnum) {
1298 m_Syntax.RestorePos(SavedPos);
1301 word = m_Syntax.GetNextWord(bIsNumber);
1303 m_Syntax.RestorePos(SavedPos);
1306 if (m_Syntax.GetKeyword() != FX_BSTRC("obj")) {
1307 m_Syntax.RestorePos(SavedPos);
1310 FX_LPVOID pResult = FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1311 if (pResult == NULL) {
1312 m_Syntax.RestorePos(SavedPos);
1315 FX_FILESIZE nextoff = ((FX_FILESIZE*)pResult)[1];
1316 FX_BOOL bNextOffValid = FALSE;
1317 if (nextoff != pos) {
1318 m_Syntax.RestorePos(nextoff);
1319 word = m_Syntax.GetNextWord(bIsNumber);
1320 if (word == FX_BSTRC("xref")) {
1321 bNextOffValid = TRUE;
1322 } else if (bIsNumber) {
1323 word = m_Syntax.GetNextWord(bIsNumber);
1324 if (bIsNumber && m_Syntax.GetKeyword() == FX_BSTRC("obj")) {
1325 bNextOffValid = TRUE;
1329 if (!bNextOffValid) {
1330 m_Syntax.RestorePos(pos);
1332 if (m_Syntax.GetKeyword() == FX_BSTRC("endobj")) {
1335 if (m_Syntax.SavePos() == m_Syntax.m_FileLen) {
1339 nextoff = m_Syntax.SavePos();
1341 size = (FX_DWORD)(nextoff - pos);
1342 pBuffer = FX_Alloc(FX_BYTE, size);
1343 m_Syntax.RestorePos(pos);
1344 m_Syntax.ReadBlock(pBuffer, size);
1345 m_Syntax.RestorePos(SavedPos);
1348 CPDF_Object* CPDF_Parser::ParseIndirectObjectAt(CPDF_IndirectObjects* pObjList, FX_FILESIZE pos, FX_DWORD objnum,
1349 PARSE_CONTEXT* pContext)
1351 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1352 m_Syntax.RestorePos(pos);
1354 CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
1356 m_Syntax.RestorePos(SavedPos);
1359 FX_FILESIZE objOffset = m_Syntax.SavePos();
1360 objOffset -= word.GetLength();
1361 FX_DWORD real_objnum = FXSYS_atoi(word);
1362 if (objnum && real_objnum != objnum) {
1363 m_Syntax.RestorePos(SavedPos);
1366 word = m_Syntax.GetNextWord(bIsNumber);
1368 m_Syntax.RestorePos(SavedPos);
1371 FX_DWORD gennum = FXSYS_atoi(word);
1372 if (m_Syntax.GetKeyword() != FX_BSTRC("obj")) {
1373 m_Syntax.RestorePos(SavedPos);
1376 CPDF_Object* pObj = m_Syntax.GetObject(pObjList, objnum, gennum, 0, pContext);
1377 FX_FILESIZE endOffset = m_Syntax.SavePos();
1378 CFX_ByteString bsWord = m_Syntax.GetKeyword();
1379 if (bsWord == FX_BSTRC("endobj")) {
1380 endOffset = m_Syntax.SavePos();
1382 FX_DWORD objSize = endOffset - objOffset;
1383 m_Syntax.RestorePos(SavedPos);
1384 if (pObj && !objnum) {
1385 pObj->m_ObjNum = real_objnum;
1389 CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict(CPDF_IndirectObjects* pObjList, FX_FILESIZE pos, FX_DWORD objnum,
1390 struct PARSE_CONTEXT* pContext, FX_FILESIZE *pResultPos)
1392 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1393 m_Syntax.RestorePos(pos);
1395 CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
1397 m_Syntax.RestorePos(SavedPos);
1400 FX_DWORD real_objnum = FXSYS_atoi(word);
1401 if (objnum && real_objnum != objnum) {
1402 m_Syntax.RestorePos(SavedPos);
1405 word = m_Syntax.GetNextWord(bIsNumber);
1407 m_Syntax.RestorePos(SavedPos);
1410 FX_DWORD gennum = FXSYS_atoi(word);
1411 if (m_Syntax.GetKeyword() != FX_BSTRC("obj")) {
1412 m_Syntax.RestorePos(SavedPos);
1415 CPDF_Object* pObj = m_Syntax.GetObjectByStrict(pObjList, objnum, gennum, 0, pContext);
1417 *pResultPos = m_Syntax.m_Pos;
1419 m_Syntax.RestorePos(SavedPos);
1422 CPDF_Dictionary* CPDF_Parser::LoadTrailerV4()
1424 if (m_Syntax.GetKeyword() != FX_BSTRC("trailer")) {
1427 CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, 0);
1428 if (pObj == NULL || pObj->GetType() != PDFOBJ_DICTIONARY) {
1434 return (CPDF_Dictionary*)pObj;
1436 FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision)
1438 if (m_pSecurityHandler == NULL) {
1439 return (FX_DWORD) - 1;
1441 FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions();
1442 if (m_pEncryptDict && m_pEncryptDict->GetString(FX_BSTRC("Filter")) == FX_BSTRC("Standard")) {
1443 dwPermission &= 0xFFFFFFFC;
1444 dwPermission |= 0xFFFFF0C0;
1445 if(bCheckRevision && m_pEncryptDict->GetInteger(FX_BSTRC("R")) == 2) {
1446 dwPermission &= 0xFFFFF0FF;
1449 return dwPermission;
1451 FX_BOOL CPDF_Parser::IsOwner()
1453 return m_pSecurityHandler == NULL ? TRUE : m_pSecurityHandler->IsOwner();
1455 void CPDF_Parser::SetSecurityHandler(CPDF_SecurityHandler* pSecurityHandler, FX_BOOL bForced)
1457 ASSERT(m_pSecurityHandler == NULL);
1458 if (m_pSecurityHandler && !m_bForceUseSecurityHandler) {
1459 delete m_pSecurityHandler;
1460 m_pSecurityHandler = NULL;
1462 m_bForceUseSecurityHandler = bForced;
1463 m_pSecurityHandler = pSecurityHandler;
1464 if (m_bForceUseSecurityHandler) {
1467 m_Syntax.m_pCryptoHandler = pSecurityHandler->CreateCryptoHandler();
1468 m_Syntax.m_pCryptoHandler->Init(NULL, pSecurityHandler);
1470 FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess, FX_DWORD offset)
1472 m_Syntax.InitParser(pFileAccess, offset);
1473 m_Syntax.RestorePos(m_Syntax.m_HeaderOffset + 9);
1474 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1476 CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
1480 FX_DWORD objnum = FXSYS_atoi(word);
1481 word = m_Syntax.GetNextWord(bIsNumber);
1485 FX_DWORD gennum = FXSYS_atoi(word);
1486 if (m_Syntax.GetKeyword() != FX_BSTRC("obj")) {
1487 m_Syntax.RestorePos(SavedPos);
1490 m_pLinearized = m_Syntax.GetObject(NULL, objnum, gennum, 0);
1491 if (!m_pLinearized) {
1494 if (m_pLinearized->GetDict()->GetElement(FX_BSTRC("Linearized"))) {
1495 m_Syntax.GetNextWord(bIsNumber);
1496 CPDF_Object *pLen = m_pLinearized->GetDict()->GetElement(FX_BSTRC("L"));
1498 m_pLinearized->Release();
1501 if (pLen->GetInteger() != (int)pFileAccess->GetSize()) {
1504 CPDF_Object *pNo = m_pLinearized->GetDict()->GetElement(FX_BSTRC("P"));
1505 if (pNo && pNo->GetType() == PDFOBJ_NUMBER) {
1506 m_dwFirstPageNo = pNo->GetInteger();
1508 CPDF_Object *pTable = m_pLinearized->GetDict()->GetElement(FX_BSTRC("T"));
1509 if (pTable && pTable->GetType() == PDFOBJ_NUMBER) {
1510 m_LastXRefOffset = pTable->GetInteger();
1514 m_pLinearized->Release();
1515 m_pLinearized = NULL;
1518 FX_DWORD CPDF_Parser::StartAsynParse(IFX_FileRead* pFileAccess, FX_BOOL bReParse, FX_BOOL bOwnFileRead)
1520 CloseParser(bReParse);
1521 m_bXRefStream = FALSE;
1522 m_LastXRefOffset = 0;
1523 m_bOwnFileRead = bOwnFileRead;
1524 FX_INT32 offset = GetHeaderOffset(pFileAccess);
1526 return PDFPARSE_ERROR_FORMAT;
1528 if (!IsLinearizedFile(pFileAccess, offset)) {
1529 m_Syntax.m_pFileAccess = NULL;
1530 return StartParse(pFileAccess, bReParse, bOwnFileRead);
1533 m_pDocument = FX_NEW CPDF_Document(this);
1535 FX_FILESIZE dwFirstXRefOffset = m_Syntax.SavePos();
1536 FX_BOOL bXRefRebuilt = FALSE;
1537 FX_BOOL bLoadV4 = FALSE;
1538 if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE, FALSE)) && !LoadCrossRefV5(dwFirstXRefOffset, dwFirstXRefOffset, TRUE)) {
1539 if (!RebuildCrossRef()) {
1540 return PDFPARSE_ERROR_FORMAT;
1542 bXRefRebuilt = TRUE;
1543 m_LastXRefOffset = 0;
1546 m_pTrailer = LoadTrailerV4();
1547 if (m_pTrailer == NULL) {
1550 FX_INT32 xrefsize = GetDirectInteger(m_pTrailer, FX_BSTRC("Size"));
1551 if (xrefsize == 0) {
1554 m_CrossRef.SetSize(xrefsize);
1555 m_V5Type.SetSize(xrefsize);
1557 FX_DWORD dwRet = SetEncryptHandler();
1558 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
1561 m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
1562 if (m_pDocument->GetRoot() == NULL || m_pDocument->GetPageCount() == 0) {
1564 return PDFPARSE_ERROR_FORMAT;
1566 ReleaseEncryptHandler();
1567 if (!RebuildCrossRef()) {
1568 return PDFPARSE_ERROR_FORMAT;
1570 dwRet = SetEncryptHandler();
1571 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
1574 m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
1575 if (m_pDocument->GetRoot() == NULL) {
1576 return PDFPARSE_ERROR_FORMAT;
1579 FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1580 FX_DWORD RootObjNum = GetRootObjNum();
1581 if (RootObjNum == 0) {
1582 ReleaseEncryptHandler();
1584 RootObjNum = GetRootObjNum();
1585 if (RootObjNum == 0) {
1586 return PDFPARSE_ERROR_FORMAT;
1588 dwRet = SetEncryptHandler();
1589 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
1593 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1594 CPDF_Reference* pMetadata = (CPDF_Reference*)m_pDocument->GetRoot()->GetElement(FX_BSTRC("Metadata"));
1595 if (pMetadata && pMetadata->GetType() == PDFOBJ_REFERENCE) {
1596 m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum();
1599 return PDFPARSE_ERROR_SUCCESS;
1601 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos)
1603 if (!LoadCrossRefV5(xrefpos, xrefpos, FALSE)) {
1607 if (!LoadCrossRefV5(xrefpos, xrefpos, FALSE)) {
1610 m_ObjectStreamMap.InitHashTable(101, FALSE);
1611 m_bXRefStream = TRUE;
1614 FX_DWORD CPDF_Parser::LoadLinearizedMainXRefTable()
1616 FX_DWORD dwSaveMetadataObjnum = m_Syntax.m_MetadataObjnum;
1617 m_Syntax.m_MetadataObjnum = 0;
1619 m_pTrailer->Release();
1622 m_Syntax.RestorePos(m_LastXRefOffset - m_Syntax.m_HeaderOffset);
1623 FX_FILESIZE dwSavedPos = m_Syntax.SavePos();
1625 FX_DWORD dwCount = 0;
1626 m_Syntax.GetNextChar(ch);
1627 FX_INT32 type = _PDF_CharType[ch];
1628 while (type == 'W') {
1630 if (m_Syntax.m_FileLen >= (FX_FILESIZE)(m_Syntax.SavePos() + m_Syntax.m_HeaderOffset)) {
1633 m_Syntax.GetNextChar(ch);
1634 type = _PDF_CharType[ch];
1636 m_LastXRefOffset += dwCount;
1637 FX_POSITION pos = m_ObjectStreamMap.GetStartPosition();
1640 CPDF_StreamAcc* pStream;
1641 m_ObjectStreamMap.GetNextAssoc(pos, objnum, (void*&)pStream);
1644 m_ObjectStreamMap.RemoveAll();
1645 if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) && !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
1646 m_LastXRefOffset = 0;
1647 m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum;
1648 return PDFPARSE_ERROR_FORMAT;
1650 FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_DWORD), _CompareDWord);
1651 m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum;
1652 return PDFPARSE_ERROR_SUCCESS;
1654 CPDF_SyntaxParser::CPDF_SyntaxParser()
1656 m_pFileAccess = NULL;
1657 m_pCryptoHandler = NULL;
1659 m_BufSize = CPDF_ModuleMgr::Get()->m_FileBufSize;
1661 m_MetadataObjnum = 0;
1663 #if defined(_FPDFAPI_MINI_)
1664 m_bFileStream = TRUE;
1666 m_bFileStream = FALSE;
1669 CPDF_SyntaxParser::~CPDF_SyntaxParser()
1672 FX_Free(m_pFileBuf);
1675 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, FX_BYTE& ch)
1677 FX_FILESIZE save_pos = m_Pos;
1679 FX_BOOL ret = GetNextChar(ch);
1683 FX_BOOL CPDF_SyntaxParser::GetNextChar(FX_BYTE& ch)
1685 FX_FILESIZE pos = m_Pos + m_HeaderOffset;
1686 if (pos >= m_FileLen) {
1689 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
1690 FX_FILESIZE read_pos = pos;
1691 FX_DWORD read_size = m_BufSize;
1692 if ((FX_FILESIZE)read_size > m_FileLen) {
1693 read_size = (FX_DWORD)m_FileLen;
1695 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
1696 if (m_FileLen < (FX_FILESIZE)read_size) {
1698 read_size = (FX_DWORD)m_FileLen;
1700 read_pos = m_FileLen - read_size;
1703 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) {
1706 m_BufOffset = read_pos;
1708 ch = m_pFileBuf[pos - m_BufOffset];
1712 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, FX_BYTE& ch)
1714 pos += m_HeaderOffset;
1715 if (pos >= m_FileLen) {
1718 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
1719 FX_FILESIZE read_pos;
1720 if (pos < (FX_FILESIZE)m_BufSize) {
1723 read_pos = pos - m_BufSize + 1;
1725 FX_DWORD read_size = m_BufSize;
1726 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
1727 if (m_FileLen < (FX_FILESIZE)read_size) {
1729 read_size = (FX_DWORD)m_FileLen;
1731 read_pos = m_FileLen - read_size;
1734 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) {
1737 m_BufOffset = read_pos;
1739 ch = m_pFileBuf[pos - m_BufOffset];
1742 FX_BOOL CPDF_SyntaxParser::ReadBlock(FX_LPBYTE pBuf, FX_DWORD size)
1744 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size)) {
1750 #define MAX_WORD_BUFFER 256
1751 void CPDF_SyntaxParser::GetNextWord()
1756 if (!GetNextChar(ch)) {
1759 FX_BYTE type = _PDF_CharType[ch];
1761 while (type == 'W') {
1762 if (!GetNextChar(ch)) {
1765 type = _PDF_CharType[ch];
1771 if (!GetNextChar(ch)) {
1774 if (ch == '\r' || ch == '\n') {
1778 type = _PDF_CharType[ch];
1781 m_bIsNumber = FALSE;
1782 m_WordBuffer[m_WordSize++] = ch;
1785 if (!GetNextChar(ch)) {
1788 type = _PDF_CharType[ch];
1789 if (type != 'R' && type != 'N') {
1793 if (m_WordSize < MAX_WORD_BUFFER) {
1794 m_WordBuffer[m_WordSize++] = ch;
1797 } else if (ch == '<') {
1798 if (!GetNextChar(ch)) {
1802 m_WordBuffer[m_WordSize++] = ch;
1806 } else if (ch == '>') {
1807 if (!GetNextChar(ch)) {
1811 m_WordBuffer[m_WordSize++] = ch;
1819 if (m_WordSize < MAX_WORD_BUFFER) {
1820 m_WordBuffer[m_WordSize++] = ch;
1823 m_bIsNumber = FALSE;
1825 if (!GetNextChar(ch)) {
1828 type = _PDF_CharType[ch];
1829 if (type == 'D' || type == 'W') {
1835 CFX_ByteString CPDF_SyntaxParser::ReadString()
1838 if (!GetNextChar(ch)) {
1839 return CFX_ByteString();
1841 CFX_ByteTextBuf buf;
1842 FX_INT32 parlevel = 0;
1843 FX_INT32 status = 0, iEscCode = 0;
1848 if (parlevel == 0) {
1849 return buf.GetByteString();
1852 buf.AppendChar(')');
1853 } else if (ch == '(') {
1855 buf.AppendChar('(');
1856 } else if (ch == '\\') {
1863 if (ch >= '0' && ch <= '7') {
1864 iEscCode = ch - '0';
1869 buf.AppendChar('\n');
1870 } else if (ch == 'r') {
1871 buf.AppendChar('\r');
1872 } else if (ch == 't') {
1873 buf.AppendChar('\t');
1874 } else if (ch == 'b') {
1875 buf.AppendChar('\b');
1876 } else if (ch == 'f') {
1877 buf.AppendChar('\f');
1878 } else if (ch == '\r') {
1881 } else if (ch == '\n') {
1888 if (ch >= '0' && ch <= '7') {
1889 iEscCode = iEscCode * 8 + ch - '0';
1892 buf.AppendChar(iEscCode);
1898 if (ch >= '0' && ch <= '7') {
1899 iEscCode = iEscCode * 8 + ch - '0';
1900 buf.AppendChar(iEscCode);
1903 buf.AppendChar(iEscCode);
1915 if (!GetNextChar(ch)) {
1920 return buf.GetByteString();
1922 CFX_ByteString CPDF_SyntaxParser::ReadHexString()
1925 if (!GetNextChar(ch)) {
1926 return CFX_ByteString();
1929 FX_BOOL bFirst = TRUE;
1935 if (ch >= '0' && ch <= '9') {
1937 code = (ch - '0') * 16;
1940 buf.AppendByte((FX_BYTE)code);
1943 } else if (ch >= 'A' && ch <= 'F') {
1945 code = (ch - 'A' + 10) * 16;
1947 code += ch - 'A' + 10;
1948 buf.AppendByte((FX_BYTE)code);
1951 } else if (ch >= 'a' && ch <= 'f') {
1953 code = (ch - 'a' + 10) * 16;
1955 code += ch - 'a' + 10;
1956 buf.AppendByte((FX_BYTE)code);
1960 if (!GetNextChar(ch)) {
1965 buf.AppendByte((FX_BYTE)code);
1967 return buf.GetByteString();
1969 void CPDF_SyntaxParser::ToNextLine()
1973 if (!GetNextChar(ch)) {
1990 void CPDF_SyntaxParser::ToNextWord()
1993 if (!GetNextChar(ch)) {
1996 FX_BYTE type = _PDF_CharType[ch];
1998 while (type == 'W') {
1999 m_dwWordPos = m_Pos;
2000 if (!GetNextChar(ch)) {
2003 type = _PDF_CharType[ch];
2009 if (!GetNextChar(ch)) {
2012 if (ch == '\r' || ch == '\n') {
2016 type = _PDF_CharType[ch];
2020 CFX_ByteString CPDF_SyntaxParser::GetNextWord(FX_BOOL& bIsNumber)
2023 bIsNumber = m_bIsNumber;
2024 return CFX_ByteString((FX_LPCSTR)m_WordBuffer, m_WordSize);
2026 CFX_ByteString CPDF_SyntaxParser::GetKeyword()
2029 return CFX_ByteString((FX_LPCSTR)m_WordBuffer, m_WordSize);
2031 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjects* pObjList, FX_DWORD objnum, FX_DWORD gennum, FX_INT32 level, PARSE_CONTEXT* pContext, FX_BOOL bDecrypt)
2033 if (level > _PARSER_OBJECT_LEVLE_) {
2036 FX_FILESIZE SavedPos = m_Pos;
2037 FX_BOOL bTypeOnly = pContext && (pContext->m_Flags & PDFPARSE_TYPEONLY);
2039 CFX_ByteString word = GetNextWord(bIsNumber);
2040 CPDF_Object* pRet = NULL;
2041 if (word.GetLength() == 0) {
2043 return (CPDF_Object*)PDFOBJ_INVALID;
2047 FX_FILESIZE wordOffset = m_Pos - word.GetLength();
2049 FX_FILESIZE SavedPos = m_Pos;
2050 CFX_ByteString nextword = GetNextWord(bIsNumber);
2052 CFX_ByteString nextword2 = GetNextWord(bIsNumber);
2053 if (nextword2 == FX_BSTRC("R")) {
2054 FX_DWORD objnum = FXSYS_atoi(word);
2056 return (CPDF_Object*)PDFOBJ_REFERENCE;
2058 pRet = CPDF_Reference::Create(pObjList, objnum);
2063 return (CPDF_Object*)PDFOBJ_NUMBER;
2065 pRet = CPDF_Number::Create(word);
2071 return (CPDF_Object*)PDFOBJ_NUMBER;
2073 pRet = CPDF_Number::Create(word);
2077 if (word == FX_BSTRC("true") || word == FX_BSTRC("false")) {
2079 return (CPDF_Object*)PDFOBJ_BOOLEAN;
2081 pRet = CPDF_Boolean::Create(word == FX_BSTRC("true"));
2084 if (word == FX_BSTRC("null")) {
2086 return (CPDF_Object*)PDFOBJ_NULL;
2088 pRet = CPDF_Null::Create();
2091 if (word == FX_BSTRC("(")) {
2093 return (CPDF_Object*)PDFOBJ_STRING;
2095 FX_FILESIZE SavedPos = m_Pos - 1;
2096 CFX_ByteString str = ReadString();
2097 if (m_pCryptoHandler && bDecrypt) {
2098 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2100 pRet = CPDF_String::Create(str, FALSE);
2103 if (word == FX_BSTRC("<")) {
2105 return (CPDF_Object*)PDFOBJ_STRING;
2107 FX_FILESIZE SavedPos = m_Pos - 1;
2108 CFX_ByteString str = ReadHexString();
2109 if (m_pCryptoHandler && bDecrypt) {
2110 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2112 pRet = CPDF_String::Create(str, TRUE);
2115 if (word == FX_BSTRC("[")) {
2117 return (CPDF_Object*)PDFOBJ_ARRAY;
2119 CPDF_Array* pArray = CPDF_Array::Create();
2120 FX_FILESIZE firstPos = m_Pos - 1;
2122 FX_FILESIZE SavedPos = m_Pos;
2123 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, level + 1);
2130 if (word[0] == '/') {
2132 return (CPDF_Object*)PDFOBJ_NAME;
2134 pRet = CPDF_Name::Create(PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
2137 if (word == FX_BSTRC("<<")) {
2138 FX_FILESIZE saveDictOffset = m_Pos - 2;
2139 FX_DWORD dwDictSize = 0;
2141 return (CPDF_Object*)PDFOBJ_DICTIONARY;
2144 pContext->m_DictStart = SavedPos;
2146 CPDF_Dictionary* pDict = CPDF_Dictionary::Create();
2148 FX_FILESIZE dwSignValuePos = 0;
2151 CFX_ByteString key = GetNextWord(bIsNumber);
2152 if (key.IsEmpty()) {
2156 FX_FILESIZE SavedPos = m_Pos - key.GetLength();
2157 if (key == FX_BSTRC(">>")) {
2158 dwDictSize = m_Pos - saveDictOffset;
2161 if (key == FX_BSTRC("endobj")) {
2162 dwDictSize = m_Pos - 6 - saveDictOffset;
2166 if (key[0] != '/') {
2170 key = PDF_NameDecode(key);
2171 if (key == FX_BSTRC("/Contents")) {
2172 dwSignValuePos = m_Pos;
2174 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, level + 1);
2178 if (key.GetLength() == 1) {
2179 pDict->SetAt(CFX_ByteStringC(((FX_LPCSTR)key) + 1, key.GetLength() - 1), pObj);
2182 pDict->SetAt(CFX_ByteStringC(((FX_LPCSTR)key) + 1, key.GetLength() - 1), pObj);
2184 pDict->AddValue(CFX_ByteStringC(((FX_LPCSTR)key) + 1, key.GetLength() - 1), pObj);
2188 if (IsSignatureDict(pDict)) {
2189 FX_FILESIZE dwSavePos = m_Pos;
2190 m_Pos = dwSignValuePos;
2191 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, level + 1, NULL, FALSE);
2192 pDict->SetAt(FX_BSTRC("Contents"), pObj);
2196 pContext->m_DictEnd = m_Pos;
2197 if (pContext->m_Flags & PDFPARSE_NOSTREAM) {
2201 FX_FILESIZE SavedPos = m_Pos;
2203 CFX_ByteString nextword = GetNextWord(bIsNumber);
2204 if (nextword == FX_BSTRC("stream")) {
2205 CPDF_Stream* pStream = ReadStream(pDict, pContext, objnum, gennum);
2216 if (word == FX_BSTRC(">>")) {
2221 return (CPDF_Object*)PDFOBJ_INVALID;
2225 CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(CPDF_IndirectObjects* pObjList, FX_DWORD objnum, FX_DWORD gennum,
2226 FX_INT32 level, struct PARSE_CONTEXT* pContext)
2228 if (level > _PARSER_OBJECT_LEVLE_) {
2231 FX_FILESIZE SavedPos = m_Pos;
2232 FX_BOOL bTypeOnly = pContext && (pContext->m_Flags & PDFPARSE_TYPEONLY);
2234 CFX_ByteString word = GetNextWord(bIsNumber);
2235 if (word.GetLength() == 0) {
2237 return (CPDF_Object*)PDFOBJ_INVALID;
2242 FX_FILESIZE SavedPos = m_Pos;
2243 CFX_ByteString nextword = GetNextWord(bIsNumber);
2245 CFX_ByteString nextword2 = GetNextWord(bIsNumber);
2246 if (nextword2 == FX_BSTRC("R")) {
2247 FX_DWORD objnum = FXSYS_atoi(word);
2249 return (CPDF_Object*)PDFOBJ_REFERENCE;
2251 return CPDF_Reference::Create(pObjList, objnum);
2255 return (CPDF_Object*)PDFOBJ_NUMBER;
2257 return CPDF_Number::Create(word);
2262 return (CPDF_Object*)PDFOBJ_NUMBER;
2264 return CPDF_Number::Create(word);
2267 if (word == FX_BSTRC("true") || word == FX_BSTRC("false")) {
2269 return (CPDF_Object*)PDFOBJ_BOOLEAN;
2271 return CPDF_Boolean::Create(word == FX_BSTRC("true"));
2273 if (word == FX_BSTRC("null")) {
2275 return (CPDF_Object*)PDFOBJ_NULL;
2277 return CPDF_Null::Create();
2279 if (word == FX_BSTRC("(")) {
2281 return (CPDF_Object*)PDFOBJ_STRING;
2283 CFX_ByteString str = ReadString();
2284 if (m_pCryptoHandler) {
2285 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2287 return CPDF_String::Create(str, FALSE);
2289 if (word == FX_BSTRC("<")) {
2291 return (CPDF_Object*)PDFOBJ_STRING;
2293 CFX_ByteString str = ReadHexString();
2294 if (m_pCryptoHandler) {
2295 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2297 return CPDF_String::Create(str, TRUE);
2299 if (word == FX_BSTRC("[")) {
2301 return (CPDF_Object*)PDFOBJ_ARRAY;
2303 CPDF_Array* pArray = CPDF_Array::Create();
2305 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, level + 1);
2307 if (m_WordBuffer[0] == ']') {
2316 if (word[0] == '/') {
2318 return (CPDF_Object*)PDFOBJ_NAME;
2320 return CPDF_Name::Create(PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
2322 if (word == FX_BSTRC("<<")) {
2324 return (CPDF_Object*)PDFOBJ_DICTIONARY;
2327 pContext->m_DictStart = SavedPos;
2329 CPDF_Dictionary* pDict = CPDF_Dictionary::Create();
2332 FX_FILESIZE SavedPos = m_Pos;
2333 CFX_ByteString key = GetNextWord(bIsNumber);
2334 if (key.IsEmpty()) {
2338 if (key == FX_BSTRC(">>")) {
2341 if (key == FX_BSTRC("endobj")) {
2345 if (key[0] != '/') {
2348 key = PDF_NameDecode(key);
2349 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, level + 1);
2354 if (!GetNextChar(ch)) {
2357 if (ch == 0x0A || ch == 0x0D) {
2363 if (key.GetLength() == 1) {
2364 pDict->SetAt(CFX_ByteStringC(((FX_LPCSTR)key) + 1, key.GetLength() - 1), pObj);
2366 pDict->AddValue(CFX_ByteStringC(((FX_LPCSTR)key) + 1, key.GetLength() - 1), pObj);
2370 pContext->m_DictEnd = m_Pos;
2371 if (pContext->m_Flags & PDFPARSE_NOSTREAM) {
2375 FX_FILESIZE SavedPos = m_Pos;
2377 CFX_ByteString nextword = GetNextWord(bIsNumber);
2378 if (nextword == FX_BSTRC("stream")) {
2379 CPDF_Stream* pStream = ReadStream(pDict, pContext, objnum, gennum);
2390 if (word == FX_BSTRC(">>")) {
2395 return (CPDF_Object*)PDFOBJ_INVALID;
2399 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, PARSE_CONTEXT* pContext,
2400 FX_DWORD objnum, FX_DWORD gennum)
2402 CPDF_Object* pLenObj = pDict->GetElement(FX_BSTRC("Length"));
2404 if (pLenObj && ((pLenObj->GetType() != PDFOBJ_REFERENCE) ||
2405 ((((CPDF_Reference*)pLenObj)->GetObjList() != NULL) &&
2406 ((CPDF_Reference*)pLenObj)->GetRefObjNum() != objnum))) {
2407 FX_FILESIZE pos = m_Pos;
2409 len = pLenObj->GetInteger();
2412 if (len > 0x40000000) {
2417 FX_FILESIZE StreamStartPos = m_Pos;
2419 pContext->m_DataStart = m_Pos;
2422 CPDF_CryptoHandler* pCryptoHandler = objnum == (FX_DWORD)m_MetadataObjnum ? NULL : m_pCryptoHandler;
2423 if (pCryptoHandler == NULL) {
2424 FX_FILESIZE SavedPos = m_Pos;
2426 if (m_WordSize < 9 || FXSYS_memcmp32(m_WordBuffer, "endstream", 9)) {
2427 m_Pos = StreamStartPos;
2428 FX_FILESIZE offset = FindTag(FX_BSTRC("endstream"), 0);
2430 FX_FILESIZE curPos = m_Pos;
2431 m_Pos = StreamStartPos;
2432 FX_FILESIZE endobjOffset = FindTag(FX_BSTRC("endobj"), 0);
2433 if (endobjOffset < offset && endobjOffset >= 0) {
2434 offset = endobjOffset;
2438 FX_BYTE byte1, byte2;
2439 GetCharAt(StreamStartPos + offset - 1, byte1);
2440 GetCharAt(StreamStartPos + offset - 2, byte2);
2441 if (byte1 == 0x0a && byte2 == 0x0d) {
2443 } else if (byte1 == 0x0a || byte1 == 0x0d) {
2446 len = (FX_DWORD)offset;
2447 pDict->SetAtInteger(FX_BSTRC("Length"), len);
2449 m_Pos = StreamStartPos;
2450 if (FindTag(FX_BSTRC("endobj"), 0) < 0) {
2456 m_Pos = StreamStartPos;
2457 CPDF_Stream* pStream;
2458 #if defined(_FPDFAPI_MINI_) && !defined(_FXCORE_FEATURE_ALL_)
2459 pStream = FX_NEW CPDF_Stream(m_pFileAccess, pCryptoHandler, m_HeaderOffset + m_Pos, len, pDict, gennum);
2462 FX_LPBYTE pData = FX_Alloc(FX_BYTE, len);
2466 ReadBlock(pData, len);
2467 if (pCryptoHandler) {
2468 CFX_BinaryBuf dest_buf;
2469 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));
2470 FX_LPVOID context = pCryptoHandler->DecryptStart(objnum, gennum);
2471 pCryptoHandler->DecryptStream(context, pData, len, dest_buf);
2472 pCryptoHandler->DecryptFinish(context, dest_buf);
2474 pData = dest_buf.GetBuffer();
2475 len = dest_buf.GetSize();
2476 dest_buf.DetachBuffer();
2478 pStream = FX_NEW CPDF_Stream(pData, len, pDict);
2481 pContext->m_DataEnd = pContext->m_DataStart + len;
2483 StreamStartPos = m_Pos;
2485 if (m_WordSize == 6 && 0 == FXSYS_memcmp32(m_WordBuffer, "endobj", 6)) {
2486 m_Pos = StreamStartPos;
2490 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess, FX_DWORD HeaderOffset)
2493 FX_Free(m_pFileBuf);
2496 m_pFileBuf = FX_Alloc(FX_BYTE, m_BufSize);
2497 m_HeaderOffset = HeaderOffset;
2498 m_FileLen = pFileAccess->GetSize();
2500 m_pFileAccess = pFileAccess;
2502 pFileAccess->ReadBlock(m_pFileBuf, 0, (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));
2504 FX_INT32 CPDF_SyntaxParser::GetDirectNum()
2510 m_WordBuffer[m_WordSize] = 0;
2511 return FXSYS_atoi((FX_LPCSTR)m_WordBuffer);
2513 FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, FX_FILESIZE limit, FX_LPCBYTE tag, FX_DWORD taglen)
2515 FX_BYTE type = _PDF_CharType[tag[0]];
2516 FX_BOOL bCheckLeft = type != 'D' && type != 'W';
2517 type = _PDF_CharType[tag[taglen - 1]];
2518 FX_BOOL bCheckRight = type != 'D' || type != 'W';
2520 if (bCheckRight && startpos + (FX_INT32)taglen <= limit && GetCharAt(startpos + (FX_INT32)taglen, ch)) {
2521 FX_BYTE type = _PDF_CharType[ch];
2522 if (type == 'N' || type == 'R') {
2526 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
2527 FX_BYTE type = _PDF_CharType[ch];
2528 if (type == 'N' || type == 'R') {
2534 FX_BOOL CPDF_SyntaxParser::SearchWord(FX_BSTR tag, FX_BOOL bWholeWord, FX_BOOL bForward, FX_FILESIZE limit)
2536 FX_INT32 taglen = tag.GetLength();
2540 FX_FILESIZE pos = m_Pos;
2541 FX_INT32 offset = 0;
2543 offset = taglen - 1;
2545 FX_LPCBYTE tag_data = tag;
2550 if (pos >= m_Pos + limit) {
2554 if (!GetCharAt(pos, byte)) {
2559 if (pos <= m_Pos - limit) {
2563 if (!GetCharAtBackward(pos, byte)) {
2567 if (byte == tag_data[offset]) {
2570 if (offset < taglen) {
2581 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
2582 if (!bWholeWord || IsWholeWord(startpos, limit, tag, taglen)) {
2588 offset = byte == tag_data[0] ? 1 : 0;
2591 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;
2600 struct _SearchTagRecord {
2605 FX_INT32 CPDF_SyntaxParser::SearchMultiWord(FX_BSTR tags, FX_BOOL bWholeWord, FX_FILESIZE limit)
2607 FX_INT32 ntags = 1, i;
2608 for (i = 0; i < tags.GetLength(); i ++)
2612 _SearchTagRecord* pPatterns = FX_Alloc(_SearchTagRecord, ntags);
2613 FX_DWORD start = 0, itag = 0, max_len = 0;
2614 for (i = 0; i <= tags.GetLength(); i ++) {
2616 FX_DWORD len = i - start;
2617 if (len > max_len) {
2620 pPatterns[itag].m_pTag = tags.GetPtr() + start;
2621 pPatterns[itag].m_Len = len;
2622 pPatterns[itag].m_Offset = 0;
2627 FX_FILESIZE pos = m_Pos;
2629 GetCharAt(pos++, byte);
2630 FX_INT32 found = -1;
2632 for (i = 0; i < ntags; i ++) {
2633 if (pPatterns[i].m_pTag[pPatterns[i].m_Offset] == byte) {
2634 pPatterns[i].m_Offset ++;
2635 if (pPatterns[i].m_Offset == pPatterns[i].m_Len) {
2636 if (!bWholeWord || IsWholeWord(pos - pPatterns[i].m_Len, limit, pPatterns[i].m_pTag, pPatterns[i].m_Len)) {
2640 if (pPatterns[i].m_pTag[0] == byte) {
2641 pPatterns[i].m_Offset = 1;
2643 pPatterns[i].m_Offset = 0;
2648 if (pPatterns[i].m_pTag[0] == byte) {
2649 pPatterns[i].m_Offset = 1;
2651 pPatterns[i].m_Offset = 0;
2655 if (limit && pos >= m_Pos + limit) {
2658 if (!GetCharAt(pos, byte)) {
2667 FX_FILESIZE CPDF_SyntaxParser::FindTag(FX_BSTR tag, FX_FILESIZE limit)
2669 FX_INT32 taglen = tag.GetLength();
2672 FX_FILESIZE startpos = m_Pos;
2675 if (!GetNextChar(ch)) {
2678 if (ch == tag[match]) {
2680 if (match == taglen) {
2681 return m_Pos - startpos - taglen;
2684 match = ch == tag[0] ? 1 : 0;
2686 if (limit && m_Pos == limit) {
2692 void CPDF_SyntaxParser::GetBinary(FX_BYTE* buffer, FX_DWORD size)
2694 FX_DWORD offset = 0;
2697 if (!GetNextChar(ch)) {
2700 buffer[offset++] = ch;
2701 if (offset == size) {
2706 CPDF_DataAvail::CPDF_DataAvail(IFX_FileAvail* pFileAvail, IFX_FileRead* pFileRead)
2708 m_pFileAvail = pFileAvail;
2709 m_pFileRead = pFileRead;
2713 m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize();
2715 m_dwCurrentOffset = 0;
2719 m_dwFirstPageNo = 0;
2722 m_dwCurrentXRefSteam = 0;
2723 m_dwAcroFormObjNum = 0;
2726 m_dwEncryptObjNum = 0;
2727 m_dwPrevXRefOffset = 0;
2728 m_dwLastXRefOffset = 0;
2729 m_bDocAvail = FALSE;
2730 m_bMainXRefLoad = FALSE;
2731 m_bDocAvail = FALSE;
2732 m_bLinearized = FALSE;
2733 m_bPagesLoad = FALSE;
2734 m_bPagesTreeLoad = FALSE;
2735 m_bMainXRefLoadedOK = FALSE;
2736 m_bAnnotsLoad = FALSE;
2737 m_bHaveAcroForm = FALSE;
2738 m_bAcroFormLoad = FALSE;
2739 m_bPageLoadedOK = FALSE;
2740 m_bNeedDownLoadResource = FALSE;
2741 m_bLinearizedFormParamLoad = FALSE;
2742 m_pLinearized = NULL;
2745 m_pCurrentParser = NULL;
2748 m_pPageResource = NULL;
2749 m_pageMapCheckState = NULL;
2750 m_docStatus = PDF_DATAAVAIL_HEADER;
2751 m_parser.m_bOwnFileRead = FALSE;
2752 m_bTotalLoadPageTree = FALSE;
2753 m_bCurPageDictLoadOK = FALSE;
2754 m_bLinearedDataOK = FALSE;
2755 m_pagesLoadState = NULL;
2757 CPDF_DataAvail::~CPDF_DataAvail()
2759 if (m_pLinearized) {
2760 m_pLinearized->Release();
2766 m_pTrailer->Release();
2768 if (m_pageMapCheckState) {
2769 delete m_pageMapCheckState;
2771 if (m_pagesLoadState) {
2772 delete m_pagesLoadState;
2775 FX_INT32 iSize = m_arrayAcroforms.GetSize();
2776 for (i = 0; i < iSize; ++i) {
2777 ((CPDF_Object *)m_arrayAcroforms.GetAt(i))->Release();
2780 void CPDF_DataAvail::SetDocument(CPDF_Document* pDoc)
2784 FX_DWORD CPDF_DataAvail::GetObjectSize(FX_DWORD objnum, FX_FILESIZE& offset)
2786 CPDF_Parser *pParser = (CPDF_Parser *)(m_pDocument->GetParser());
2787 if (pParser == NULL) {
2790 if (objnum >= (FX_DWORD)pParser->m_CrossRef.GetSize()) {
2793 if (pParser->m_V5Type[objnum] == 2) {
2794 objnum = (FX_DWORD)pParser->m_CrossRef[objnum];
2796 if (pParser->m_V5Type[objnum] == 1 || pParser->m_V5Type[objnum] == 255) {
2797 offset = pParser->m_CrossRef[objnum];
2801 FX_LPVOID pResult = FXSYS_bsearch(&offset, pParser->m_SortedOffset.GetData(), pParser->m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
2802 if (pResult == NULL) {
2805 if ((FX_FILESIZE*)pResult - (FX_FILESIZE*)pParser->m_SortedOffset.GetData() == pParser->m_SortedOffset.GetSize() - 1) {
2808 return (FX_DWORD)(((FX_FILESIZE*)pResult)[1] - offset);
2812 FX_BOOL CPDF_DataAvail::IsObjectsAvail(CFX_PtrArray& obj_array, FX_BOOL bParsePage, IFX_DownloadHints* pHints, CFX_PtrArray &ret_array)
2814 if (!obj_array.GetSize()) {
2818 CFX_PtrArray new_obj_array;
2820 for (i = 0; i < obj_array.GetSize(); i++) {
2821 CPDF_Object *pObj = (CPDF_Object *)obj_array[i];
2825 FX_INT32 type = pObj->GetType();
2827 case PDFOBJ_ARRAY: {
2828 CPDF_Array *pArray = pObj->GetArray();
2829 for (FX_DWORD k = 0; k < pArray->GetCount(); k++) {
2830 new_obj_array.Add(pArray->GetElement(k));
2835 pObj = pObj->GetDict();
2836 case PDFOBJ_DICTIONARY: {
2837 CPDF_Dictionary *pDict = pObj->GetDict();
2838 if (pDict->GetString("Type") == "Page" && !bParsePage) {
2841 FX_POSITION pos = pDict->GetStartPos();
2845 value = pDict->GetNextElement(pos, key);
2846 if (key != "Parent") {
2847 new_obj_array.Add(value);
2852 case PDFOBJ_REFERENCE: {
2853 CPDF_Reference *pRef = (CPDF_Reference*)pObj;
2854 FX_DWORD dwNum = pRef->GetRefObjNum();
2856 FX_DWORD size = GetObjectSize(pRef->GetRefObjNum(), offset);
2860 size = (FX_DWORD)((FX_FILESIZE)(offset + size + 512) > m_dwFileLen ? m_dwFileLen - offset : size + 512);
2861 if (!m_pFileAvail->IsDataAvail(offset, size)) {
2862 pHints->AddSegment(offset, size);
2863 ret_array.Add(pObj);
2865 } else if (!m_objnum_array.Find(dwNum)) {
2866 m_objnum_array.AddObjNum(dwNum);
2867 CPDF_Object *pReferred = m_pDocument->GetIndirectObject(pRef->GetRefObjNum(), NULL);
2869 new_obj_array.Add(pReferred);
2877 FX_INT32 iSize = new_obj_array.GetSize();
2878 for (i = 0; i < iSize; ++i) {
2879 CPDF_Object *pObj = (CPDF_Object *)new_obj_array[i];
2880 FX_INT32 type = pObj->GetType();
2881 if (type == PDFOBJ_REFERENCE) {
2882 CPDF_Reference *pRef = (CPDF_Reference *)pObj;
2883 FX_DWORD dwNum = pRef->GetRefObjNum();
2884 if (!m_objnum_array.Find(dwNum)) {
2885 ret_array.Add(pObj);
2888 ret_array.Add(pObj);
2893 obj_array.RemoveAll();
2894 obj_array.Append(new_obj_array);
2895 return IsObjectsAvail(obj_array, FALSE, pHints, ret_array);
2897 FX_BOOL CPDF_DataAvail::IsDocAvail(IFX_DownloadHints* pHints)
2899 if (!m_dwFileLen && m_pFileRead) {
2900 m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize();
2905 while (!m_bDocAvail) {
2906 if (!CheckDocStatus(pHints)) {
2912 FX_BOOL CPDF_DataAvail::CheckAcroFormSubObject(IFX_DownloadHints* pHints)
2914 if (!m_objs_array.GetSize()) {
2915 m_objs_array.RemoveAll();
2916 m_objnum_array.RemoveAll();
2917 CFX_PtrArray obj_array;
2918 obj_array.Append(m_arrayAcroforms);
2919 FX_BOOL bRet = IsObjectsAvail(obj_array, FALSE, pHints, m_objs_array);
2921 m_objs_array.RemoveAll();
2925 CFX_PtrArray new_objs_array;
2926 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
2928 FX_INT32 iSize = m_arrayAcroforms.GetSize();
2929 for (FX_INT32 i = 0; i < iSize; ++i) {
2930 ((CPDF_Object *)m_arrayAcroforms.GetAt(i))->Release();
2932 m_arrayAcroforms.RemoveAll();
2934 m_objs_array.RemoveAll();
2935 m_objs_array.Append(new_objs_array);
2940 FX_BOOL CPDF_DataAvail::CheckAcroForm(IFX_DownloadHints* pHints)
2942 FX_BOOL bExist = FALSE;
2943 m_pAcroForm = GetObject(m_dwAcroFormObjNum, pHints, &bExist);
2945 m_docStatus = PDF_DATAAVAIL_PAGETREE;
2949 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
2950 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
2955 m_arrayAcroforms.Add(m_pAcroForm);
2956 m_docStatus = PDF_DATAAVAIL_PAGETREE;
2959 FX_BOOL CPDF_DataAvail::CheckDocStatus(IFX_DownloadHints *pHints)
2961 switch (m_docStatus) {
2962 case PDF_DATAAVAIL_HEADER:
2963 return CheckHeader(pHints);
2964 case PDF_DATAAVAIL_FIRSTPAGE:
2965 case PDF_DATAAVAIL_FIRSTPAGE_PREPARE:
2966 return CheckFirstPage(pHints);
2967 case PDF_DATAAVAIL_END:
2968 return CheckEnd(pHints);
2969 case PDF_DATAAVAIL_CROSSREF:
2970 return CheckCrossRef(pHints);
2971 case PDF_DATAAVAIL_CROSSREF_ITEM:
2972 return CheckCrossRefItem(pHints);
2973 case PDF_DATAAVAIL_CROSSREF_STREAM:
2974 return CheckAllCrossRefStream(pHints);
2975 case PDF_DATAAVAIL_TRAILER:
2976 return CheckTrailer(pHints);
2977 case PDF_DATAAVAIL_TRAILER_APPEND:
2978 return CheckTrailerAppend(pHints);
2979 case PDF_DATAAVAIL_LOADALLCRSOSSREF:
2980 return LoadAllXref(pHints);
2981 case PDF_DATAAVAIL_LOADALLFILE:
2982 return LoadAllFile(pHints);
2983 case PDF_DATAAVAIL_ROOT:
2984 return CheckRoot(pHints);
2985 case PDF_DATAAVAIL_INFO:
2986 return CheckInfo(pHints);
2987 case PDF_DATAAVAIL_ACROFORM:
2988 return CheckAcroForm(pHints);
2989 case PDF_DATAAVAIL_PAGETREE:
2990 if (m_bTotalLoadPageTree) {
2991 return CheckPages(pHints);
2993 return LoadDocPages(pHints);
2995 case PDF_DATAAVAIL_PAGE:
2996 if (m_bTotalLoadPageTree) {
2997 return CheckPage(pHints);
2999 m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD;
3002 case PDF_DATAAVAIL_ERROR:
3003 return LoadAllFile(pHints);
3004 case PDF_DATAAVAIL_PAGE_LATERLOAD:
3005 m_docStatus = PDF_DATAAVAIL_PAGE;
3011 FX_BOOL CPDF_DataAvail::CheckPageStatus(IFX_DownloadHints* pHints)
3013 switch (m_docStatus) {
3014 case PDF_DATAAVAIL_PAGETREE:
3015 return CheckPages(pHints);
3016 case PDF_DATAAVAIL_PAGE:
3017 return CheckPage(pHints);
3018 case PDF_DATAAVAIL_ERROR:
3019 return LoadAllFile(pHints);
3021 m_bPagesTreeLoad = TRUE;
3022 m_bPagesLoad = TRUE;
3026 FX_BOOL CPDF_DataAvail::LoadAllFile(IFX_DownloadHints* pHints)
3028 if (m_pFileAvail->IsDataAvail(0, (FX_DWORD)m_dwFileLen)) {
3029 m_docStatus = PDF_DATAAVAIL_DONE;
3032 pHints->AddSegment(0, (FX_DWORD)m_dwFileLen);
3035 FX_BOOL CPDF_DataAvail::LoadAllXref(IFX_DownloadHints* pHints)
3037 m_parser.m_Syntax.InitParser(m_pFileRead, (FX_DWORD)m_dwHeaderOffset);
3038 m_parser.m_bOwnFileRead = FALSE;
3039 if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) && !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) {
3040 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3043 FXSYS_qsort(m_parser.m_SortedOffset.GetData(), m_parser.m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
3044 m_dwRootObjNum = m_parser.GetRootObjNum();
3045 m_dwInfoObjNum = m_parser.GetInfoObjNum();
3046 m_pCurrentParser = &m_parser;
3047 m_docStatus = PDF_DATAAVAIL_ROOT;
3050 CPDF_Object* CPDF_DataAvail::GetObject(FX_DWORD objnum, IFX_DownloadHints* pHints, FX_BOOL *pExistInFile)
3052 CPDF_Object *pRet = NULL;
3054 *pExistInFile = TRUE;
3056 if (m_pDocument == NULL) {
3057 FX_FILESIZE offset = m_parser.GetObjectOffset(objnum);
3059 *pExistInFile = FALSE;
3062 FX_DWORD size = (FX_DWORD)m_parser.GetObjectSize(objnum);
3063 size = (FX_DWORD)(((FX_FILESIZE)(offset + size + 512)) > m_dwFileLen ? m_dwFileLen - offset : size + 512);
3064 if (!m_pFileAvail->IsDataAvail(offset, size)) {
3065 pHints->AddSegment(offset, size);
3068 pRet = m_parser.ParseIndirectObject(NULL, objnum);
3069 if (!pRet && pExistInFile) {
3070 *pExistInFile = FALSE;
3075 FX_DWORD size = GetObjectSize(objnum, offset);
3076 size = (FX_DWORD)((FX_FILESIZE)(offset + size + 512) > m_dwFileLen ? m_dwFileLen - offset : size + 512);
3077 if (!m_pFileAvail->IsDataAvail(offset, size)) {
3078 pHints->AddSegment(offset, size);
3081 CPDF_Parser *pParser = (CPDF_Parser *)(m_pDocument->GetParser());
3082 pRet = pParser->ParseIndirectObject(NULL, objnum, NULL);
3083 if (!pRet && pExistInFile) {
3084 *pExistInFile = FALSE;
3088 FX_BOOL CPDF_DataAvail::CheckInfo(IFX_DownloadHints* pHints)
3090 FX_BOOL bExist = FALSE;
3091 CPDF_Object *pInfo = GetObject(m_dwInfoObjNum, pHints, &bExist);
3093 if (m_bHaveAcroForm) {
3094 m_docStatus = PDF_DATAAVAIL_ACROFORM;
3096 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3101 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3102 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3105 if (m_Pos == m_dwFileLen) {
3106 m_docStatus = PDF_DATAAVAIL_ERROR;
3113 if (m_bHaveAcroForm) {
3114 m_docStatus = PDF_DATAAVAIL_ACROFORM;
3116 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3120 FX_BOOL CPDF_DataAvail::CheckRoot(IFX_DownloadHints* pHints)
3122 FX_BOOL bExist = FALSE;
3123 m_pRoot = GetObject(m_dwRootObjNum, pHints, &bExist);
3125 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3129 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3130 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3135 CPDF_Reference* pRef = (CPDF_Reference*)m_pRoot->GetDict()->GetElement(FX_BSTRC("Pages"));
3136 if (pRef == NULL || pRef->GetType() != PDFOBJ_REFERENCE) {
3137 m_docStatus = PDF_DATAAVAIL_ERROR;
3140 m_PagesObjNum = pRef->GetRefObjNum();
3141 CPDF_Reference* pAcroFormRef = (CPDF_Reference*)m_pRoot->GetDict()->GetElement(FX_BSTRC("AcroForm"));
3142 if (pAcroFormRef && pAcroFormRef->GetType() == PDFOBJ_REFERENCE) {
3143 m_bHaveAcroForm = TRUE;
3144 m_dwAcroFormObjNum = pAcroFormRef->GetRefObjNum();
3146 if (m_dwInfoObjNum) {
3147 m_docStatus = PDF_DATAAVAIL_INFO;
3149 if (m_bHaveAcroForm) {
3150 m_docStatus = PDF_DATAAVAIL_ACROFORM;
3152 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3157 FX_BOOL CPDF_DataAvail::PreparePageItem()
3159 CPDF_Dictionary *pRoot = m_pDocument->GetRoot();
3160 CPDF_Reference* pRef = (CPDF_Reference*)pRoot->GetElement(FX_BSTRC("Pages"));
3161 if (pRef == NULL || pRef->GetType() != PDFOBJ_REFERENCE) {
3162 m_docStatus = PDF_DATAAVAIL_ERROR;
3165 m_PagesObjNum = pRef->GetRefObjNum();
3166 m_pCurrentParser = (CPDF_Parser *)m_pDocument->GetParser();
3167 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3170 FX_BOOL CPDF_DataAvail::IsFirstCheck(int iPage)
3172 if (NULL == m_pageMapCheckState) {
3173 m_pageMapCheckState = FX_NEW CFX_CMapDWordToDWord();
3175 FX_DWORD dwValue = 0;
3176 if (!m_pageMapCheckState->Lookup(iPage, dwValue)) {
3177 m_pageMapCheckState->SetAt(iPage, 1);
3183 m_pageMapCheckState->SetAt(iPage, 1);
3186 void CPDF_DataAvail::ResetFirstCheck(int iPage)
3188 if (NULL == m_pageMapCheckState) {
3189 m_pageMapCheckState = FX_NEW CFX_CMapDWordToDWord();
3191 FX_DWORD dwValue = 1;
3192 if (!m_pageMapCheckState->Lookup(iPage, dwValue)) {
3195 m_pageMapCheckState->SetAt(iPage, 0);
3197 FX_BOOL CPDF_DataAvail::CheckPage(IFX_DownloadHints* pHints)
3200 FX_DWORD iLen = m_PageObjList.GetSize();
3201 CFX_DWordArray UnavailObjList;
3202 for (; i < iLen; ++i) {
3203 FX_DWORD dwPageObjNum = m_PageObjList.GetAt(i);
3204 FX_BOOL bExist = FALSE;
3205 CPDF_Object *pObj = GetObject(dwPageObjNum, pHints, &bExist);
3208 UnavailObjList.Add(dwPageObjNum);
3212 if (pObj->GetType() == PDFOBJ_ARRAY) {
3213 CPDF_Array *pArray = pObj->GetArray();
3215 FX_INT32 iSize = pArray->GetCount();
3216 CPDF_Object *pItem = NULL;
3217 for (FX_INT32 j = 0; j < iSize; ++j) {
3218 pItem = pArray->GetElement(j);
3219 if (pItem && pItem->GetType() == PDFOBJ_REFERENCE) {
3220 UnavailObjList.Add(((CPDF_Reference *)pItem)->GetRefObjNum());
3225 if (pObj->GetType() != PDFOBJ_DICTIONARY) {
3229 CFX_ByteString type = pObj->GetDict()->GetString(FX_BSTRC("Type"));
3230 if (type == FX_BSTRC("Pages")) {
3231 m_PagesArray.Add(pObj);
3236 m_PageObjList.RemoveAll();
3237 if (UnavailObjList.GetSize()) {
3238 m_PageObjList.Append(UnavailObjList);
3242 iLen = m_PagesArray.GetSize();
3243 for (; i < iLen; ++i) {
3244 CPDF_Object *pPages = (CPDF_Object *)m_PagesArray.GetAt(i);
3248 if (!GetPageKids(m_pCurrentParser, pPages)) {
3250 while (i++ < iLen) {
3251 pPages = (CPDF_Object *)m_PagesArray.GetAt(i);
3254 m_PagesArray.RemoveAll();
3255 m_docStatus = PDF_DATAAVAIL_ERROR;
3260 m_PagesArray.RemoveAll();
3261 if (!m_PageObjList.GetSize()) {
3262 m_docStatus = PDF_DATAAVAIL_DONE;
3267 FX_BOOL CPDF_DataAvail::GetPageKids(CPDF_Parser *pParser, CPDF_Object *pPages)
3270 m_docStatus = PDF_DATAAVAIL_ERROR;
3273 CPDF_Object *pKids = pPages->GetDict()->GetElement(FX_BSTRC("Kids"));
3277 switch (pKids->GetType()) {
3278 case PDFOBJ_REFERENCE: {
3279 CPDF_Reference *pKid = (CPDF_Reference *)pKids;
3280 m_PageObjList.Add(pKid->GetRefObjNum());
3283 case PDFOBJ_ARRAY: {
3284 CPDF_Array *pKidsArray = (CPDF_Array *)pKids;
3285 for (FX_DWORD i = 0; i < pKidsArray->GetCount(); ++i) {
3286 CPDF_Reference *pKid = (CPDF_Reference *)pKidsArray->GetElement(i);
3287 m_PageObjList.Add(pKid->GetRefObjNum());
3292 m_docStatus = PDF_DATAAVAIL_ERROR;
3297 FX_BOOL CPDF_DataAvail::CheckPages(IFX_DownloadHints* pHints)
3299 FX_BOOL bExist = FALSE;
3300 CPDF_Object *pPages = GetObject(m_PagesObjNum, pHints, &bExist);
3302 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3306 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3307 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3312 FX_BOOL bNeedLoad = FALSE;
3313 if (!GetPageKids(m_pCurrentParser, pPages)) {
3315 m_docStatus = PDF_DATAAVAIL_ERROR;
3319 m_docStatus = PDF_DATAAVAIL_PAGE;
3322 FX_BOOL CPDF_DataAvail::CheckHeader(IFX_DownloadHints* pHints)
3324 FX_DWORD req_size = 1024;
3325 if ((FX_FILESIZE)req_size > m_dwFileLen) {
3326 req_size = (FX_DWORD)m_dwFileLen;
3328 if (m_pFileAvail->IsDataAvail(0, req_size)) {
3329 FX_BYTE buffer[1024];
3330 m_pFileRead->ReadBlock(buffer, 0, req_size);
3331 if (IsLinearizedFile(buffer, req_size)) {
3332 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE;
3334 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3337 m_docStatus = PDF_DATAAVAIL_END;
3341 pHints->AddSegment(0, req_size);
3344 FX_BOOL CPDF_DataAvail::CheckFirstPage(IFX_DownloadHints *pHints)
3346 FX_DWORD dwFirstPageEndOffset = 0;
3347 CPDF_Object *pEndOffSet = m_pLinearized->GetDict()->GetElement(FX_BSTRC("E"));
3349 m_docStatus = PDF_DATAAVAIL_ERROR;
3352 CPDF_Object *pXRefOffset = m_pLinearized->GetDict()->GetElement(FX_BSTRC("T"));
3354 m_docStatus = PDF_DATAAVAIL_ERROR;
3357 CPDF_Object *pFileLen = m_pLinearized->GetDict()->GetElement(FX_BSTRC("L"));
3359 m_docStatus = PDF_DATAAVAIL_ERROR;
3362 FX_BOOL bNeedDownLoad = FALSE;
3363 if (pEndOffSet->GetType() == PDFOBJ_NUMBER) {
3364 FX_DWORD dwEnd = pEndOffSet->GetInteger();
3366 if ((FX_FILESIZE)dwEnd > m_dwFileLen) {
3367 dwEnd = (FX_DWORD)m_dwFileLen;
3369 FX_INT32 iStartPos = (FX_INT32)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen);
3370 FX_INT32 iSize = dwEnd > 1024 ? dwEnd - 1024 : 0;
3371 if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) {
3372 pHints->AddSegment(iStartPos, iSize);
3373 bNeedDownLoad = TRUE;
3376 m_dwLastXRefOffset = 0;
3377 FX_FILESIZE dwFileLen = 0;
3378 if (pXRefOffset->GetType() == PDFOBJ_NUMBER) {
3379 m_dwLastXRefOffset = pXRefOffset->GetInteger();
3381 if (pFileLen->GetType() == PDFOBJ_NUMBER) {
3382 dwFileLen = pFileLen->GetInteger();
3384 if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, (FX_DWORD)(dwFileLen - m_dwLastXRefOffset))) {
3385 if (m_docStatus == PDF_DATAAVAIL_FIRSTPAGE) {
3386 FX_DWORD dwSize = (FX_DWORD)(dwFileLen - m_dwLastXRefOffset);
3387 FX_FILESIZE offset = m_dwLastXRefOffset;
3388 if (dwSize < 512 && dwFileLen > 512) {
3390 offset = dwFileLen - 512;
3392 pHints->AddSegment(offset, dwSize);
3395 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE;
3397 if (!bNeedDownLoad && m_docStatus == PDF_DATAAVAIL_FIRSTPAGE_PREPARE) {
3398 m_docStatus = PDF_DATAAVAIL_DONE;
3401 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE;
3404 CPDF_Object * CPDF_DataAvail::ParseIndirectObjectAt(FX_FILESIZE pos, FX_DWORD objnum)
3406 FX_FILESIZE SavedPos = m_syntaxParser.SavePos();
3407 m_syntaxParser.RestorePos(pos);
3409 CFX_ByteString word = m_syntaxParser.GetNextWord(bIsNumber);
3413 FX_DWORD real_objnum = FXSYS_atoi(word);
3414 if (objnum && real_objnum != objnum) {
3417 word = m_syntaxParser.GetNextWord(bIsNumber);
3421 FX_DWORD gennum = FXSYS_atoi(word);
3422 if (m_syntaxParser.GetKeyword() != FX_BSTRC("obj")) {
3423 m_syntaxParser.RestorePos(SavedPos);
3426 CPDF_Object* pObj = m_syntaxParser.GetObject(NULL, objnum, gennum, 0);
3427 m_syntaxParser.RestorePos(SavedPos);
3430 FX_INT32 CPDF_DataAvail::IsLinearizedPDF()
3432 FX_DWORD req_size = 1024;
3433 if (!m_pFileAvail->IsDataAvail(0, req_size)) {
3434 return PDF_UNKNOW_LINEARIZED;
3437 return PDF_NOT_LINEARIZED;
3439 FX_FILESIZE dwSize = m_pFileRead->GetSize();
3440 if (dwSize < (FX_FILESIZE)req_size) {
3441 return PDF_UNKNOW_LINEARIZED;
3443 FX_BYTE buffer[1024];
3444 m_pFileRead->ReadBlock(buffer, 0, req_size);
3445 if (IsLinearizedFile(buffer, req_size)) {
3446 return PDF_IS_LINEARIZED;
3448 return PDF_NOT_LINEARIZED;
3450 FX_BOOL CPDF_DataAvail::IsLinearizedFile(FX_LPBYTE pData, FX_DWORD dwLen)
3452 CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream(pData, (size_t)dwLen, FALSE));
3453 FX_INT32 offset = GetHeaderOffset((IFX_FileStream*)file);
3455 m_docStatus = PDF_DATAAVAIL_ERROR;
3458 m_dwHeaderOffset = offset;
3459 m_syntaxParser.InitParser((IFX_FileStream*)file, offset);
3460 m_syntaxParser.RestorePos(m_syntaxParser.m_HeaderOffset + 9);
3461 FX_BOOL bNumber = FALSE;
3462 FX_FILESIZE dwSavePos = m_syntaxParser.SavePos();
3463 CFX_ByteString wordObjNum = m_syntaxParser.GetNextWord(bNumber);
3467 FX_DWORD objnum = FXSYS_atoi(wordObjNum);
3468 if (m_pLinearized) {
3469 m_pLinearized->Release();
3470 m_pLinearized = NULL;
3472 m_pLinearized = ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum);
3473 if (!m_pLinearized) {
3476 if (m_pLinearized->GetDict()->GetElement(FX_BSTRC("Linearized"))) {
3477 CPDF_Object *pLen = m_pLinearized->GetDict()->GetElement(FX_BSTRC("L"));
3481 if ((FX_FILESIZE)pLen->GetInteger() != m_pFileRead->GetSize()) {
3484 m_bLinearized = TRUE;
3485 CPDF_Object *pNo = m_pLinearized->GetDict()->GetElement(FX_BSTRC("P"));
3486 if (pNo && pNo->GetType() == PDFOBJ_NUMBER) {
3487 m_dwFirstPageNo = pNo->GetInteger();
3493 FX_BOOL CPDF_DataAvail::CheckEnd(IFX_DownloadHints* pHints)
3495 FX_DWORD req_pos = (FX_DWORD)(m_dwFileLen > 1024 ? m_dwFileLen - 1024 : 0);
3496 FX_DWORD dwSize = (FX_DWORD)(m_dwFileLen - req_pos);
3497 if (m_pFileAvail->IsDataAvail(req_pos, dwSize)) {
3498 FX_BYTE buffer[1024];
3499 m_pFileRead->ReadBlock(buffer, req_pos, dwSize);
3500 CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream(buffer, (size_t)dwSize, FALSE));
3501 m_syntaxParser.InitParser((IFX_FileStream*)file, 0);
3502 m_syntaxParser.RestorePos(dwSize - 1);
3503 if (m_syntaxParser.SearchWord(FX_BSTRC("startxref"), TRUE, FALSE, dwSize)) {
3505 m_syntaxParser.GetNextWord(bNumber);
3506 CFX_ByteString xrefpos_str = m_syntaxParser.GetNextWord(bNumber);
3508 m_docStatus = PDF_DATAAVAIL_ERROR;
3511 m_dwXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);
3512 if (!m_dwXRefOffset || m_dwXRefOffset > m_dwFileLen) {
3513 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3516 m_dwLastXRefOffset = m_dwXRefOffset;
3517 SetStartOffset(m_dwXRefOffset);
3518 m_docStatus = PDF_DATAAVAIL_CROSSREF;
3521 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3525 pHints->AddSegment(req_pos, dwSize);
3528 FX_DWORD CPDF_DataAvail::CheckCrossRefStream(IFX_DownloadHints* pHints, FX_FILESIZE &xref_offset)
3531 FX_DWORD req_size = (FX_DWORD)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3532 if (m_pFileAvail->IsDataAvail(m_Pos, req_size)) {
3533 FX_INT32 iSize = (FX_INT32)(m_Pos + req_size - m_dwCurrentXRefSteam);
3534 CFX_BinaryBuf buf(iSize);
3535 FX_LPBYTE pBuf = buf.GetBuffer();
3536 m_pFileRead->ReadBlock(pBuf, m_dwCurrentXRefSteam, iSize);
3537 CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE));
3538 m_parser.m_Syntax.InitParser((IFX_FileStream*)file, 0);
3539 FX_BOOL bNumber = FALSE;
3540 FX_FILESIZE dwSavePos = m_parser.m_Syntax.SavePos();
3541 CFX_ByteString objnum = m_parser.m_Syntax.GetNextWord(bNumber);
3545 FX_DWORD objNum = FXSYS_atoi(objnum);
3546 CPDF_Object *pObj = m_parser.ParseIndirectObjectAt(NULL, 0, objNum, NULL);
3548 m_Pos += m_parser.m_Syntax.SavePos();
3551 CPDF_Object *pName = pObj->GetDict()->GetElement(FX_BSTRC("Type"));
3552 if (pName && pName->GetType() == PDFOBJ_NAME) {
3553 if (pName->GetString() == FX_BSTRC("XRef")) {
3554 m_Pos += m_parser.m_Syntax.SavePos();
3555 xref_offset = pObj->GetDict()->GetInteger(FX_BSTRC("Prev"));
3566 pHints->AddSegment(m_Pos, req_size);
3569 inline void CPDF_DataAvail::SetStartOffset(FX_FILESIZE dwOffset)
3573 #define MAX_WORD_BUFFER 256
3574 FX_BOOL CPDF_DataAvail::GetNextToken(CFX_ByteString &token)
3578 if (!GetNextChar(ch)) {
3581 FX_BYTE type = _PDF_CharType[ch];
3583 while (type == 'W') {
3584 if (!GetNextChar(ch)) {
3587 type = _PDF_CharType[ch];
3593 if (!GetNextChar(ch)) {
3596 if (ch == '\r' || ch == '\n') {
3600 type = _PDF_CharType[ch];
3603 m_WordBuffer[m_WordSize++] = ch;
3606 if (!GetNextChar(ch)) {
3609 type = _PDF_CharType[ch];
3610 if (type != 'R' && type != 'N') {
3612 CFX_ByteString ret(m_WordBuffer, m_WordSize);
3616 if (m_WordSize < MAX_WORD_BUFFER) {
3617 m_WordBuffer[m_WordSize++] = ch;
3620 } else if (ch == '<') {
3621 if (!GetNextChar(ch)) {
3625 m_WordBuffer[m_WordSize++] = ch;
3629 } else if (ch == '>') {
3630 if (!GetNextChar(ch)) {
3634 m_WordBuffer[m_WordSize++] = ch;
3639 CFX_ByteString ret(m_WordBuffer, m_WordSize);
3644 if (m_WordSize < MAX_WORD_BUFFER) {
3645 m_WordBuffer[m_WordSize++] = ch;
3647 if (!GetNextChar(ch)) {
3650 type = _PDF_CharType[ch];
3651 if (type == 'D' || type == 'W') {
3656 CFX_ByteString ret(m_WordBuffer, m_WordSize);
3660 FX_BOOL CPDF_DataAvail::GetNextChar(FX_BYTE &ch)
3662 FX_FILESIZE pos = m_Pos;
3663 if (pos >= m_dwFileLen) {
3666 if (m_bufferOffset >= pos || (FX_FILESIZE)(m_bufferOffset + m_bufferSize) <= pos) {
3667 FX_FILESIZE read_pos = pos;
3668 FX_DWORD read_size = 512;
3669 if ((FX_FILESIZE)read_size > m_dwFileLen) {
3670 read_size = (FX_DWORD)m_dwFileLen;
3672 if ((FX_FILESIZE)(read_pos + read_size) > m_dwFileLen) {
3673 read_pos = m_dwFileLen - read_size;
3675 if (!m_pFileRead->ReadBlock(m_bufferData, read_pos, read_size)) {
3678 m_bufferOffset = read_pos;
3679 m_bufferSize = read_size;
3681 ch = m_bufferData[pos - m_bufferOffset];
3685 FX_BOOL CPDF_DataAvail::CheckCrossRefItem(IFX_DownloadHints *pHints)
3688 CFX_ByteString token;
3690 if (!GetNextToken(token)) {
3691 iSize = (FX_INT32)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3692 pHints->AddSegment(m_Pos, iSize);
3695 if (token == "trailer") {
3696 m_dwTrailerOffset = m_Pos;
3697 m_docStatus = PDF_DATAAVAIL_TRAILER;
3702 FX_BOOL CPDF_DataAvail::CheckAllCrossRefStream(IFX_DownloadHints *pHints)
3704 FX_FILESIZE xref_offset = 0;
3705 FX_DWORD dwRet = CheckCrossRefStream(pHints, xref_offset);
3708 m_docStatus = PDF_DATAAVAIL_LOADALLCRSOSSREF;
3710 m_dwCurrentXRefSteam = xref_offset;
3711 m_Pos = xref_offset;
3714 } else if (dwRet == -1) {
3715 m_docStatus = PDF_DATAAVAIL_ERROR;
3719 FX_BOOL CPDF_DataAvail::CheckCrossRef(IFX_DownloadHints* pHints)
3721 FX_FILESIZE dwSavePos = m_Pos;
3723 CFX_ByteString token;
3724 if (!GetNextToken(token)) {
3725 iSize = (FX_INT32)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3726 pHints->AddSegment(m_Pos, iSize);
3729 if (token == "xref") {
3730 m_CrossOffset.InsertAt(0, m_dwXRefOffset);
3732 if (!GetNextToken(token)) {
3733 iSize = (FX_INT32)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3734 pHints->AddSegment(m_Pos, iSize);
3735 m_docStatus = PDF_DATAAVAIL_CROSSREF_ITEM;
3738 if (token == "trailer") {
3739 m_dwTrailerOffset = m_Pos;
3740 m_docStatus = PDF_DATAAVAIL_TRAILER;
3745 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3750 FX_BOOL CPDF_DataAvail::CheckTrailerAppend(IFX_DownloadHints* pHints)
3752 if (m_Pos < m_dwFileLen) {
3753 FX_FILESIZE dwAppendPos = m_Pos + m_syntaxParser.SavePos();
3754 FX_INT32 iSize = (FX_INT32)(dwAppendPos + 512 > m_dwFileLen ? m_dwFileLen - dwAppendPos : 512);
3755 if (!m_pFileAvail->IsDataAvail(dwAppendPos, iSize)) {
3756 pHints->AddSegment(dwAppendPos, iSize);
3760 if (m_dwPrevXRefOffset) {
3761 SetStartOffset(m_dwPrevXRefOffset);
3762 m_docStatus = PDF_DATAAVAIL_CROSSREF;
3764 m_docStatus = PDF_DATAAVAIL_LOADALLCRSOSSREF;
3768 FX_BOOL CPDF_DataAvail::CheckTrailer(IFX_DownloadHints* pHints)
3770 FX_INT32 iTrailerSize = (FX_INT32)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3771 if (m_pFileAvail->IsDataAvail(m_Pos, iTrailerSize)) {
3772 FX_INT32 iSize = (FX_INT32)(m_Pos + iTrailerSize - m_dwTrailerOffset);
3773 CFX_BinaryBuf buf(iSize);
3774 FX_LPBYTE pBuf = buf.GetBuffer();
3776 m_docStatus = PDF_DATAAVAIL_ERROR;
3779 if (!m_pFileRead->ReadBlock(pBuf, m_dwTrailerOffset, iSize)) {
3782 CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE));
3783 m_syntaxParser.InitParser((IFX_FileStream*)file, 0);
3784 CPDF_Object *pTrailer = m_syntaxParser.GetObject(NULL, 0, 0, 0);
3786 m_Pos += m_syntaxParser.SavePos();
3787 pHints->AddSegment(m_Pos, iTrailerSize);
3790 CPDF_Dictionary *pTrailerDict = pTrailer->GetDict();
3792 CPDF_Object *pEncrypt = pTrailerDict->GetElement("Encrypt");
3793 if (pEncrypt && pEncrypt->GetType() == PDFOBJ_REFERENCE) {
3794 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3795 pTrailer->Release();
3799 FX_DWORD xrefpos = GetDirectInteger(pTrailer->GetDict(), FX_BSTRC("Prev"));
3801 m_dwPrevXRefOffset = GetDirectInteger(pTrailer->GetDict(), FX_BSTRC("XRefStm"));
3802 pTrailer->Release();
3803 if (m_dwPrevXRefOffset) {
3804 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3806 m_dwPrevXRefOffset = xrefpos;
3807 if (m_dwPrevXRefOffset >= m_dwFileLen) {
3808 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3810 SetStartOffset(m_dwPrevXRefOffset);
3811 m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND;
3816 m_dwPrevXRefOffset = 0;
3817 m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND;
3818 pTrailer->Release();
3822 pHints->AddSegment(m_Pos, iTrailerSize);
3825 FX_BOOL CPDF_DataAvail::CheckPage(FX_INT32 iPage, IFX_DownloadHints* pHints)
3828 switch (m_docStatus) {
3829 case PDF_DATAAVAIL_PAGETREE:
3830 if (!LoadDocPages(pHints)) {
3834 case PDF_DATAAVAIL_PAGE:
3835 if (!LoadDocPage(iPage, pHints)) {
3839 case PDF_DATAAVAIL_ERROR:
3840 return LoadAllFile(pHints);
3842 m_bPagesTreeLoad = TRUE;
3843 m_bPagesLoad = TRUE;
3844 m_bCurPageDictLoadOK = TRUE;
3845 m_docStatus = PDF_DATAAVAIL_PAGE;
3850 FX_BOOL CPDF_DataAvail::CheckArrayPageNode(FX_DWORD dwPageNo, CPDF_PageNode *pPageNode, IFX_DownloadHints* pHints)
3852 FX_BOOL bExist = FALSE;
3853 CPDF_Object *pPages = GetObject(dwPageNo, pHints, &bExist);
3855 m_docStatus = PDF_DATAAVAIL_ERROR;
3859 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3860 m_docStatus = PDF_DATAAVAIL_ERROR;
3865 if (pPages->GetType() != PDFOBJ_ARRAY) {
3867 m_docStatus = PDF_DATAAVAIL_ERROR;
3870 pPageNode->m_type = PDF_PAGENODE_PAGES;
3871 CPDF_Array* pArray = (CPDF_Array*)pPages;
3872 for (FX_DWORD i = 0; i < pArray->GetCount(); ++i) {
3873 CPDF_Object *pKid = (CPDF_Object *)pArray->GetElement(i);
3874 if (!pKid || pKid->GetType() != PDFOBJ_REFERENCE) {
3877 CPDF_PageNode *pNode = FX_NEW CPDF_PageNode();
3878 pPageNode->m_childNode.Add(pNode);
3879 pNode->m_dwPageNo = ((CPDF_Reference*)pKid)->GetRefObjNum();
3884 FX_BOOL CPDF_DataAvail::CheckUnkownPageNode(FX_DWORD dwPageNo, CPDF_PageNode *pPageNode, IFX_DownloadHints* pHints)
3886 FX_BOOL bExist = FALSE;
3887 CPDF_Object *pPage = GetObject(dwPageNo, pHints, &bExist);
3889 m_docStatus = PDF_DATAAVAIL_ERROR;
3893 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3894 m_docStatus = PDF_DATAAVAIL_ERROR;
3899 if (pPage->GetType() == PDFOBJ_ARRAY) {
3900 pPageNode->m_dwPageNo = dwPageNo;
3901 pPageNode->m_type = PDF_PAGENODE_ARRAY;
3905 if (pPage->GetType() != PDFOBJ_DICTIONARY) {
3907 m_docStatus = PDF_DATAAVAIL_ERROR;
3910 pPageNode->m_dwPageNo = dwPageNo;
3911 CFX_ByteString type = pPage->GetDict()->GetString(FX_BSTRC("Type"));
3912 if (type == FX_BSTRC("Pages")) {
3913 pPageNode->m_type = PDF_PAGENODE_PAGES;
3914 CPDF_Object *pKids = pPage->GetDict()->GetElement(FX_BSTRC("Kids"));
3916 m_docStatus = PDF_DATAAVAIL_PAGE;
3919 switch (pKids->GetType()) {
3920 case PDFOBJ_REFERENCE: {
3921 CPDF_Reference *pKid = (CPDF_Reference *)pKids;
3922 CPDF_PageNode *pNode = FX_NEW CPDF_PageNode();
3923 pPageNode->m_childNode.Add(pNode);
3924 pNode->m_dwPageNo = pKid->GetRefObjNum();
3927 case PDFOBJ_ARRAY: {
3928 CPDF_Array *pKidsArray = (CPDF_Array *)pKids;
3929 for (FX_DWORD i = 0; i < pKidsArray->GetCount(); ++i) {
3930 CPDF_Object *pKid = (CPDF_Object *)pKidsArray->GetElement(i);
3931 if (!pKid || pKid->GetType() != PDFOBJ_REFERENCE) {
3934 CPDF_PageNode *pNode = FX_NEW CPDF_PageNode();
3935 pPageNode->m_childNode.Add(pNode);
3936 pNode->m_dwPageNo = ((CPDF_Reference*)pKid)->GetRefObjNum();
3943 } else if (type == FX_BSTRC("Page")) {
3944 pPageNode->m_type = PDF_PAGENODE_PAGE;
3947 m_docStatus = PDF_DATAAVAIL_ERROR;
3953 FX_BOOL CPDF_DataAvail::CheckPageNode(CPDF_PageNode &pageNodes, FX_INT32 iPage, FX_INT32 &iCount, IFX_DownloadHints* pHints)
3955 FX_INT32 iSize = pageNodes.m_childNode.GetSize();
3957 m_docStatus = PDF_DATAAVAIL_ERROR;
3960 for (FX_INT32 i = 0; i < iSize; ++i) {
3961 CPDF_PageNode *pNode = (CPDF_PageNode*)pageNodes.m_childNode.GetAt(i);
3965 switch (pNode->m_type) {
3966 case PDF_PAGENODE_UNKOWN:
3967 if (!CheckUnkownPageNode(pNode->m_dwPageNo, pNode, pHints)) {
3972 case PDF_PAGENODE_PAGE:
3974 if (iPage == iCount && m_pDocument) {
3975 m_pDocument->m_PageList.SetAt(iPage, pNode->m_dwPageNo);
3978 case PDF_PAGENODE_PAGES:
3979 if (!CheckPageNode(*pNode, iPage, iCount, pHints)) {
3983 case PDF_PAGENODE_ARRAY:
3984 if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode, pHints)) {
3990 if (iPage == iCount) {
3991 m_docStatus = PDF_DATAAVAIL_DONE;
3997 FX_BOOL CPDF_DataAvail::LoadDocPage(FX_INT32 iPage, IFX_DownloadHints* pHints)
3999 if (m_pDocument->GetPageCount() <= iPage || m_pDocument->m_PageList.GetAt(iPage)) {
4000 m_docStatus = PDF_DATAAVAIL_DONE;
4003 if (m_pageNodes.m_type == PDF_PAGENODE_PAGE) {
4005 m_docStatus = PDF_DATAAVAIL_DONE;
4008 m_docStatus = PDF_DATAAVAIL_ERROR;
4011 FX_INT32 iCount = -1;
4012 return CheckPageNode(m_pageNodes, iPage, iCount, pHints);
4014 FX_BOOL CPDF_DataAvail::CheckPageCount(IFX_DownloadHints* pHints)
4016 FX_BOOL bExist = FALSE;
4017 CPDF_Object *pPages = GetObject(m_PagesObjNum, pHints, &bExist);
4019 m_docStatus = PDF_DATAAVAIL_ERROR;
4025 CPDF_Dictionary* pPagesDict = pPages->GetDict();
4028 m_docStatus = PDF_DATAAVAIL_ERROR;
4031 if (!pPagesDict->KeyExist(FX_BSTRC("Kids"))) {
4035 int count = pPagesDict->GetInteger(FX_BSTRC("Count"));
4043 FX_BOOL CPDF_DataAvail::LoadDocPages(IFX_DownloadHints* pHints)
4045 if (!CheckUnkownPageNode(m_PagesObjNum, &m_pageNodes, pHints)) {
4048 if (CheckPageCount(pHints)) {
4049 m_docStatus = PDF_DATAAVAIL_PAGE;
4052 m_bTotalLoadPageTree = TRUE;
4056 FX_BOOL CPDF_DataAvail::LoadPages(IFX_DownloadHints* pHints)
4058 while (!m_bPagesTreeLoad) {
4059 if (!CheckPageStatus(pHints)) {
4066 m_pDocument->LoadPages();
4069 FX_BOOL CPDF_DataAvail::CheckLinearizedData(IFX_DownloadHints* pHints)
4071 if (m_bLinearedDataOK) {
4074 if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, (FX_DWORD)(m_dwFileLen - m_dwLastXRefOffset))) {
4075 pHints->AddSegment(m_dwLastXRefOffset, (FX_DWORD)(m_dwFileLen - m_dwLastXRefOffset));
4079 if (!m_bMainXRefLoad) {
4080 dwRet = ((CPDF_Parser *)m_pDocument->GetParser())->LoadLinearizedMainXRefTable();
4081 if (dwRet == PDFPARSE_ERROR_SUCCESS) {
4082 if (!PreparePageItem()) {
4085 m_bMainXRefLoadedOK = TRUE;
4087 m_bMainXRefLoad = TRUE;
4089 m_bLinearedDataOK = TRUE;
4092 FX_BOOL CPDF_DataAvail::CheckPageAnnots(FX_INT32 iPage, IFX_DownloadHints* pHints)
4094 if (!m_objs_array.GetSize()) {
4095 m_objs_array.RemoveAll();
4096 m_objnum_array.RemoveAll();
4097 CPDF_Dictionary *pPageDict = m_pDocument->GetPage(iPage);
4101 CPDF_Object *pAnnots = pPageDict->GetElement(FX_BSTRC("Annots"));
4105 CFX_PtrArray obj_array;
4106 obj_array.Add(pAnnots);
4107 FX_BOOL bRet = IsObjectsAvail(obj_array, FALSE, pHints, m_objs_array);
4109 m_objs_array.RemoveAll();
4113 CFX_PtrArray new_objs_array;
4114 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4115 m_objs_array.RemoveAll();
4117 m_objs_array.Append(new_objs_array);
4122 FX_BOOL CPDF_DataAvail::CheckLinearizedFirstPage(FX_INT32 iPage, IFX_DownloadHints* pHints)
4124 if (!m_bAnnotsLoad) {
4125 if (!CheckPageAnnots(iPage, pHints)) {
4128 m_bAnnotsLoad = TRUE;
4131 if (!CheckLinearizedData(pHints)) {
4134 m_bPageLoadedOK = FALSE;
4137 FX_BOOL CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary *pDict)
4139 CPDF_Object *pParent = pDict->GetElement("Parent");
4143 CPDF_Dictionary *pParentDict = pParent->GetDict();
4147 CPDF_Object *pRet = pParentDict->GetElement("Resource");
4149 m_pPageResource = pRet;
4152 return HaveResourceAncestor(pParentDict);
4155 FX_BOOL CPDF_DataAvail::IsPageAvail(FX_INT32 iPage, IFX_DownloadHints* pHints)
4160 if (IsFirstCheck(iPage)) {
4161 m_bCurPageDictLoadOK = FALSE;
4162 m_bPageLoadedOK = FALSE;
4163 m_bAnnotsLoad = FALSE;
4164 m_bNeedDownLoadResource = FALSE;
4165 m_objs_array.RemoveAll();
4166 m_objnum_array.RemoveAll();
4168 if (m_pagesLoadState == NULL) {
4169 m_pagesLoadState = FX_NEW CFX_CMapDWordToDWord();
4171 FX_DWORD dwPageLoad = 0;
4172 if (m_pagesLoadState->Lookup(iPage, dwPageLoad) && dwPageLoad != 0) {
4175 if (m_bLinearized) {
4176 if ((FX_DWORD)iPage == m_dwFirstPageNo) {
4177 m_pagesLoadState->SetAt(iPage, TRUE);
4180 if (!CheckLinearizedData(pHints)) {
4183 if (m_bMainXRefLoadedOK) {
4184 if (m_bTotalLoadPageTree) {
4185 if (!LoadPages(pHints)) {
4189 if (!m_bCurPageDictLoadOK && !CheckPage(iPage, pHints)) {
4194 if (!LoadAllFile(pHints)) {
4197 ((CPDF_Parser *)m_pDocument->GetParser())->RebuildCrossRef();
4198 ResetFirstCheck(iPage);
4202 if (!m_bTotalLoadPageTree) {
4203 if (!m_bCurPageDictLoadOK && !CheckPage(iPage, pHints)) {
4208 if (m_bHaveAcroForm && !m_bAcroFormLoad) {
4209 if (!CheckAcroFormSubObject(pHints)) {
4212 m_bAcroFormLoad = TRUE;
4214 if (!m_bPageLoadedOK) {
4215 if (!m_objs_array.GetSize()) {
4216 m_objs_array.RemoveAll();
4217 m_objnum_array.RemoveAll();
4218 m_pPageDict = m_pDocument->GetPage(iPage);
4220 ResetFirstCheck(iPage);
4223 CFX_PtrArray obj_array;
4224 obj_array.Add(m_pPageDict);
4225 FX_BOOL bRet = IsObjectsAvail(obj_array, TRUE, pHints, m_objs_array);
4227 m_objs_array.RemoveAll();
4228 m_bPageLoadedOK = TRUE;
4233 CFX_PtrArray new_objs_array;
4234 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4235 m_objs_array.RemoveAll();
4237 m_bPageLoadedOK = TRUE;
4239 m_objs_array.Append(new_objs_array);
4244 if (m_bPageLoadedOK) {
4245 if (!m_bAnnotsLoad) {
4246 if (!CheckPageAnnots(iPage, pHints)) {
4249 m_bAnnotsLoad = TRUE;
4252 if (m_pPageDict && !m_bNeedDownLoadResource) {
4253 CPDF_Object *pRes = m_pPageDict->GetElement("Resource");
4255 m_bNeedDownLoadResource = HaveResourceAncestor(m_pPageDict);
4257 m_bNeedDownLoadResource = FALSE;
4259 if (m_bNeedDownLoadResource) {
4260 FX_BOOL bRet = CheckResources(pHints);
4264 m_bNeedDownLoadResource = FALSE;
4266 m_bPageLoadedOK = FALSE;
4267 m_bAnnotsLoad = FALSE;
4268 m_bCurPageDictLoadOK = FALSE;
4269 ResetFirstCheck(iPage);
4270 m_pagesLoadState->SetAt(iPage, TRUE);
4273 FX_BOOL CPDF_DataAvail::CheckResources(IFX_DownloadHints* pHints)
4275 if (!m_objs_array.GetSize()) {
4276 m_objs_array.RemoveAll();
4277 CFX_PtrArray obj_array;
4278 obj_array.Add(m_pPageResource);
4279 FX_BOOL bRet = IsObjectsAvail(obj_array, TRUE, pHints, m_objs_array);
4281 m_objs_array.RemoveAll();
4285 CFX_PtrArray new_objs_array;
4286 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4287 m_objs_array.RemoveAll();
4289 m_objs_array.Append(new_objs_array);
4294 void CPDF_DataAvail::GetLinearizedMainXRefInfo(FX_FILESIZE *pPos, FX_DWORD *pSize)
4297 *pPos = m_dwLastXRefOffset;
4300 *pSize = (FX_DWORD)(m_dwFileLen - m_dwLastXRefOffset);
4303 FX_INT32 CPDF_DataAvail::IsFormAvail(IFX_DownloadHints *pHints)
4306 return PDFFORM_AVAIL;
4308 if (!m_bLinearizedFormParamLoad) {
4309 CPDF_Dictionary *pRoot = m_pDocument->GetRoot();
4311 return PDFFORM_AVAIL;
4313 CPDF_Object *pAcroForm = pRoot->GetElement(FX_BSTRC("AcroForm"));
4315 return PDFFORM_NOTEXIST;
4317 if (!m_bMainXRefLoad && !CheckLinearizedData(pHints)) {
4318 return PDFFORM_NOTAVAIL;
4320 if (!m_objs_array.GetSize()) {
4321 m_objs_array.Add(pAcroForm->GetDict());
4323 m_bLinearizedFormParamLoad = TRUE;
4325 CFX_PtrArray new_objs_array;
4326 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4327 m_objs_array.RemoveAll();
4329 m_objs_array.Append(new_objs_array);
4330 return PDFFORM_NOTAVAIL;
4332 return PDFFORM_AVAIL;
4334 void CPDF_SortObjNumArray::AddObjNum(FX_DWORD dwObjNum)
4337 if (BinarySearch(dwObjNum, iNext)) {
4340 m_number_array.InsertAt(iNext, dwObjNum);
4342 FX_BOOL CPDF_SortObjNumArray::Find(FX_DWORD dwObjNum)
4345 return BinarySearch(dwObjNum, iNext);
4347 FX_BOOL CPDF_SortObjNumArray::BinarySearch(FX_DWORD value, FX_INT32 &iNext)
4349 FX_INT32 iLen = m_number_array.GetSize();
4351 FX_INT32 iHigh = iLen - 1;
4353 while (iLow <= iHigh) {
4354 iMid = (iLow + iHigh) / 2;
4355 FX_DWORD tt = m_number_array.GetAt(iMid);
4356 if (m_number_array.GetAt(iMid) == value) {
4359 } else if (m_number_array.GetAt(iMid) > value) {
4361 } else if (m_number_array.GetAt(iMid) < value) {
4368 CPDF_PageNode::~CPDF_PageNode()
4370 FX_INT32 iSize = m_childNode.GetSize();
4371 for (FX_INT32 i = 0; i < iSize; ++i) {
4372 CPDF_PageNode *pNode = (CPDF_PageNode*)m_childNode[i];
4377 m_childNode.RemoveAll();