1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
7 #include "../../../include/fpdfapi/fpdf_page.h"
8 #include "../../../include/fpdfapi/fpdf_module.h"
10 #if defined(_FPDFAPI_MINI_)
11 extern const FX_LPCSTR _PDF_CharType;
12 void CPDF_StreamContentParser::InputData(FX_LPCBYTE src_buf, FX_DWORD src_size)
14 if (m_Level > _FPDF_MAX_FORM_LEVEL_) {
17 for (FX_DWORD i = 0; i < src_size; i ++) {
19 int type = _PDF_CharType[ch];
21 switch (m_WordState) {
24 } else if (type == 'N') {
28 } else if (type == 'R') {
61 if (ch == '\n' || ch == '\r') {
66 if (type != 'R' && type != 'N') {
71 if (m_WordSize < 256) {
72 m_pWordBuf[m_WordSize++] = ch;
86 if (type != 'R' && type != 'N') {
94 if (m_WordSize < 256) {
95 m_pWordBuf[m_WordSize++] = ch;
104 if (m_WordSize < 256) {
105 m_pWordBuf[m_WordSize++] = ch;
113 m_StringBuf.AppendByte(ch);
117 switch (m_StringState) {
121 if (m_StringLevel == 0) {
126 m_StringBuf.AppendByte(')');
127 } else if (ch == '(') {
129 m_StringBuf.AppendByte('(');
130 } else if (ch == '\\') {
133 m_StringBuf.AppendByte((char)ch);
137 if (ch >= '0' && ch <= '7') {
138 m_EscCode = ch - '0';
143 m_StringBuf.AppendByte('\n');
144 } else if (ch == 'r') {
145 m_StringBuf.AppendByte('\r');
146 } else if (ch == 't') {
147 m_StringBuf.AppendByte('\t');
148 } else if (ch == 'b') {
149 m_StringBuf.AppendByte('\b');
150 } else if (ch == 'f') {
151 m_StringBuf.AppendByte('\f');
152 } else if (ch == '\\') {
153 m_StringBuf.AppendByte('\\');
154 } else if (ch == '(') {
155 m_StringBuf.AppendByte('(');
156 } else if (ch == ')') {
157 m_StringBuf.AppendByte(')');
158 } else if (ch == '\r') {
161 } else if (ch == '\n') {
163 m_StringBuf.AppendByte(ch);
168 if (ch >= '0' && ch <= '7') {
169 m_EscCode = m_EscCode * 8 + ch - '0';
172 m_StringBuf.AppendByte(m_EscCode);
178 if (ch >= '0' && ch <= '7') {
179 m_EscCode = m_EscCode * 8 + ch - '0';
180 m_StringBuf.AppendByte(m_EscCode);
183 m_StringBuf.AppendByte(m_EscCode);
205 switch (m_InlineImageState) {
207 if (type == 'W' || type == 'D') {
208 m_InlineImageState = 1;
209 m_InlineWhiteChar = ch;
211 m_StringBuf.AppendByte(ch);
215 m_StringBuf.AppendByte(m_InlineWhiteChar);
217 m_InlineImageState = 2;
219 m_InlineImageState = 0;
225 m_InlineImageState = 3;
227 m_StringBuf.AppendByte('I');
228 m_InlineImageState = 0;
238 switch (m_InlineImageState) {
241 m_InlineImageState = 1;
242 m_InlineWhiteChar = ch;
244 m_ImageSrcBuf.AppendByte(ch);
249 m_InlineImageState = 2;
251 m_ImageSrcBuf.AppendByte(m_InlineWhiteChar);
252 m_InlineImageState = 0;
258 m_InlineImageState = 3;
260 m_ImageSrcBuf.AppendByte(m_InlineWhiteChar);
261 m_ImageSrcBuf.AppendByte('E');
262 m_InlineImageState = 0;
270 m_ImageSrcBuf.AppendByte(m_InlineWhiteChar);
271 m_ImageSrcBuf.AppendByte('E');
272 m_ImageSrcBuf.AppendByte('I');
273 m_InlineImageState = 0;
280 if (m_InlineImageState < m_ImageSrcBuf.GetSize()) {
281 m_ImageSrcBuf.GetBuffer()[m_InlineImageState ++] = ch;
291 void CPDF_StreamContentParser::Finish()
293 switch (m_WordState) {
325 void CPDF_StreamContentParser::AddContainer(CPDF_Object* pObject)
328 m_pObjectState[m_ObjectSize] = SetToCurObj(pObject);
330 FXSYS_assert(m_ObjectSize < _FPDF_MAX_OBJECT_STACK_SIZE_);
331 m_pObjectStack[m_ObjectSize++] = pObject;
333 FX_BOOL CPDF_StreamContentParser::SetToCurObj(CPDF_Object* pObject)
335 if (m_ObjectSize == 0) {
336 AddObjectParam(pObject);
339 FX_BOOL bInArrayOrDict = TRUE;
340 CPDF_Object* pCurObj = m_pObjectStack[m_ObjectSize - 1];
341 if (pCurObj->GetType() == PDFOBJ_ARRAY) {
342 ((CPDF_Array*)pCurObj)->Add(pObject, m_pDocument);
344 if (!m_bDictName && m_pDictName[0]) {
345 ((CPDF_Dictionary*)pCurObj)->SetAt((FX_LPCSTR)m_pDictName, pObject, m_pDocument);
347 bInArrayOrDict = FALSE;
351 return bInArrayOrDict;
353 void CPDF_StreamContentParser::StartArray()
356 if (m_pObjectStack[0]->GetType() != PDFOBJ_DICTIONARY && m_pObjectStack[m_ObjectSize - 1]->GetType() == PDFOBJ_ARRAY) {
359 CPDF_Array* pArray = FX_NEW CPDF_Array;
360 AddContainer(pArray);
362 void CPDF_StreamContentParser::EndArray()
364 if (m_ObjectSize == 0) {
367 CPDF_Object* pCurObj = m_pObjectStack[m_ObjectSize - 1];
368 if (pCurObj->GetType() != PDFOBJ_ARRAY) {
372 if (m_ObjectSize == 0) {
373 AddObjectParam(pCurObj);
375 if (!m_pObjectState[m_ObjectSize]) {
379 m_pObjectState[m_ObjectSize] = FALSE;
381 void CPDF_StreamContentParser::StartDict()
383 CPDF_Dictionary* pDict = FX_NEW CPDF_Dictionary;
387 void CPDF_StreamContentParser::EndDict()
389 if (m_ObjectSize == 0) {
392 CPDF_Object* pCurObj = m_pObjectStack[m_ObjectSize - 1];
393 if (pCurObj->GetType() != PDFOBJ_DICTIONARY) {
397 if (m_ObjectSize == 0) {
398 AddObjectParam(pCurObj);
400 if (!m_pObjectState[m_ObjectSize]) {
404 m_pObjectState[m_ObjectSize] = FALSE;
406 void CPDF_StreamContentParser::EndName()
408 if (m_ObjectSize == 0) {
409 AddNameParam((FX_LPCSTR)m_pWordBuf, m_WordSize);
412 CPDF_Object* pCurObj = m_pObjectStack[m_ObjectSize - 1];
413 if (pCurObj->GetType() == PDFOBJ_ARRAY) {
414 ((CPDF_Array*)pCurObj)->AddName(CFX_ByteString(m_pWordBuf, m_WordSize));
417 FXSYS_memcpy32(m_pDictName, m_pWordBuf, m_WordSize);
418 m_pDictName[m_WordSize] = 0;
420 if (m_pDictName[0] != 0) {
421 ((CPDF_Dictionary*)pCurObj)->SetAtName((FX_LPCSTR)m_pDictName, CFX_ByteString(m_pWordBuf, m_WordSize));
424 m_bDictName = !m_bDictName;
427 void CPDF_StreamContentParser::EndNumber()
429 if (m_ObjectSize == 0) {
430 AddNumberParam((FX_LPCSTR)m_pWordBuf, m_WordSize);
433 CPDF_Number *pObj = FX_NEW CPDF_Number(CFX_ByteStringC(m_pWordBuf, m_WordSize));
434 if (!SetToCurObj(pObj)) {
438 extern CFX_ByteString _FPDF_ByteStringFromHex(CFX_BinaryBuf& src_buf);
439 void CPDF_StreamContentParser::EndHexString()
441 CPDF_String *pObj = FX_NEW CPDF_String(_FPDF_ByteStringFromHex(m_StringBuf), TRUE);
442 if (!SetToCurObj(pObj)) {
446 void CPDF_StreamContentParser::EndString()
448 CPDF_String *pObj = FX_NEW CPDF_String(m_StringBuf.GetByteString());
449 if (!SetToCurObj(pObj)) {
453 void CPDF_StreamContentParser::Handle_BeginImage(void)
456 m_InlineImageState = 0;
459 void _PDF_ReplaceAbbr(CPDF_Object* pObj);
460 void CPDF_StreamContentParser::EndImageDict()
462 if (m_StringBuf.GetSize() != m_LastImageDict.GetSize() ||
463 FXSYS_memcmp32(m_StringBuf.GetBuffer(), m_LastImageDict.GetBuffer(), m_StringBuf.GetSize())) {
466 InputData(m_StringBuf.GetBuffer(), m_StringBuf.GetSize());
468 m_bSameLastDict = FALSE;
469 if (m_pLastImageDict && m_bReleaseLastDict) {
470 m_pLastImageDict->Release();
471 m_pLastImageDict = NULL;
474 m_InlineImageState = 0;
477 m_pLastImageDict = (CPDF_Dictionary*)m_pObjectStack[--m_ObjectSize];
478 m_bReleaseLastDict = !m_pObjectState[m_ObjectSize];
479 m_pObjectState[m_ObjectSize] = FALSE;
480 _PDF_ReplaceAbbr(m_pLastImageDict);
481 m_LastImageDict.TakeOver(m_StringBuf);
482 if (m_pLastImageDict->KeyExist(FX_BSTRC("ColorSpace"))) {
483 CPDF_Object* pCSObj = m_pLastImageDict->GetElementValue(FX_BSTRC("ColorSpace"));
484 if (pCSObj->GetType() == PDFOBJ_NAME) {
485 CFX_ByteString name = pCSObj->GetString();
486 if (name != FX_BSTRC("DeviceRGB") && name != FX_BSTRC("DeviceGray") && name != FX_BSTRC("DeviceCMYK")) {
487 pCSObj = FindResourceObj(FX_BSTRC("ColorSpace"), name);
489 if (!pCSObj->GetObjNum()) {
490 pCSObj = pCSObj->Clone();
492 m_pLastImageDict->SetAt(FX_BSTRC("ColorSpace"), pCSObj, m_pDocument);
498 m_bSameLastDict = TRUE;
500 m_ImageSrcBuf.Clear();
501 if (m_pLastCloneImageDict)
502 m_pLastCloneImageDict->Release();
504 m_pLastCloneImageDict = (CPDF_Dictionary*)m_pLastImageDict->Clone();
505 if (m_pLastCloneImageDict->KeyExist(FX_BSTRC("Filter"))) {
507 m_InlineImageState = 0;
509 int width = m_pLastCloneImageDict->GetInteger(FX_BSTRC("Width"));
510 int height = m_pLastCloneImageDict->GetInteger(FX_BSTRC("Height"));
512 CPDF_Object* pCSObj = m_pLastCloneImageDict->GetElementValue(FX_BSTRC("ColorSpace"));
513 if (pCSObj != NULL) {
514 int bpc = m_pLastCloneImageDict->GetInteger(FX_BSTRC("BitsPerComponent"));
516 CPDF_ColorSpace* pCS = m_pDocument->LoadColorSpace(pCSObj);
520 nComponents = pCS->CountComponents();
521 m_pDocument->GetPageData()->ReleaseColorSpace(pCSObj);
523 int pitch = (width * bpc * nComponents + 7) / 8;
524 OrigSize = pitch * height;
526 OrigSize = ((width + 7) / 8) * height;
528 m_ImageSrcBuf.AppendBlock(NULL, OrigSize);
530 m_InlineImageState = 0;
533 void CPDF_StreamContentParser::EndInlineImage()
535 CFX_AffineMatrix ImageMatrix;
536 ImageMatrix.Copy(m_pCurStates->m_CTM);
537 ImageMatrix.Concat(m_mtContentToUser);
538 m_LastImageData.CopyData(m_ImageSrcBuf.GetBuffer(), m_ImageSrcBuf.GetSize());
539 CPDF_Stream* pStream = CPDF_Stream::Create(m_ImageSrcBuf.GetBuffer(), m_ImageSrcBuf.GetSize(),
540 m_pLastCloneImageDict);
541 m_ImageSrcBuf.DetachBuffer();
542 m_pLastCloneImageDict = NULL;
543 CPDF_InlineImages* pImages = FX_NEW CPDF_InlineImages;
544 pImages->m_pStream = pStream;
545 SetGraphicStates(pImages, !m_pLastCloneImageDict->KeyExist(FX_BSTRC("ColorSpace")), FALSE, FALSE);
546 pImages->AddMatrix(ImageMatrix);
547 m_pObjectList->m_ObjectList.AddTail(pImages);
550 #define FXDWORD_TRUE FXDWORD_FROM_LSBFIRST(0x65757274)
551 #define FXDWORD_NULL FXDWORD_FROM_LSBFIRST(0x6c6c756e)
552 #define FXDWORD_FALS FXDWORD_FROM_LSBFIRST(0x736c6166)
553 void CPDF_StreamContentParser::EndKeyword()
555 CPDF_Object *pObj = NULL;
556 if (m_WordSize == 4) {
557 if (*(FX_DWORD*)m_pWordBuf == FXDWORD_TRUE) {
558 pObj = CPDF_Boolean::Create(TRUE);
559 if (!SetToCurObj(pObj)) {
563 } else if (*(FX_DWORD*)m_pWordBuf == FXDWORD_NULL) {
564 pObj = CPDF_Null::Create();
565 if (!SetToCurObj(pObj)) {
570 } else if (m_WordSize == 5) {
571 if (*(FX_DWORD*)m_pWordBuf == FXDWORD_FALS && m_pWordBuf[4] == 'e') {
572 pObj = CPDF_Boolean::Create(FALSE);
573 if (!SetToCurObj(pObj)) {
579 m_pWordBuf[m_WordSize] = 0;
580 OnOperator((char*)m_pWordBuf);
583 #define PAGEPARSE_STAGE_PARSE 2
584 #define PAGEPARSE_STAGE_CHECKCLIP 3
585 CPDF_ContentParser::CPDF_ContentParser()
589 m_pStreamFilter = NULL;
592 CPDF_ContentParser::~CPDF_ContentParser()
596 void CPDF_ContentParser::Clear()
601 if (m_pStreamFilter) {
602 delete m_pStreamFilter;
607 void CPDF_ContentParser::Start(CPDF_Page* pPage, CPDF_ParseOptions* pOptions)
609 if (m_Status != Ready || pPage == NULL || pPage->m_pDocument == NULL || pPage->m_pFormDict == NULL) {
616 m_Options = *pOptions;
618 CPDF_Object* pContent = pPage->m_pFormDict->GetElementValue(FX_BSTRC("Contents"));
619 if (pContent == NULL) {
623 if (pContent->GetType() == PDFOBJ_STREAM) {
625 } else if (pContent->GetType() == PDFOBJ_ARRAY) {
626 m_nStreams = ((CPDF_Array*)pContent)->GetCount();
631 m_Status = ToBeContinued;
632 m_InternalStage = PAGEPARSE_STAGE_PARSE;
634 m_pParser = FX_NEW CPDF_StreamContentParser;
635 m_pParser->Initialize();
636 m_pParser->PrepareParse(pPage->m_pDocument, pPage->m_pResources, NULL, NULL, pPage,
637 pPage->m_pResources, &pPage->m_BBox, &m_Options, NULL, 0);
638 m_pParser->m_pCurStates->m_ColorState.GetModify()->Default();
640 void CPDF_ContentParser::Start(CPDF_Form* pForm, CPDF_AllStates* pGraphicStates, CFX_AffineMatrix* pParentMatrix,
641 CPDF_Type3Char* pType3Char, CPDF_ParseOptions* pOptions, int level)
643 m_pType3Char = pType3Char;
646 CFX_AffineMatrix form_matrix = pForm->m_pFormDict->GetMatrix(FX_BSTRC("Matrix"));
647 if (pGraphicStates) {
648 form_matrix.Concat(pGraphicStates->m_CTM);
650 CPDF_Array* pBBox = pForm->m_pFormDict->GetArray(FX_BSTRC("BBox"));
651 CFX_FloatRect form_bbox;
654 form_bbox = pBBox->GetRect();
656 ClipPath.AppendRect(form_bbox.left, form_bbox.bottom, form_bbox.right, form_bbox.top);
657 ClipPath.Transform(&form_matrix);
659 ClipPath.Transform(pParentMatrix);
661 form_bbox.Transform(&form_matrix);
663 CPDF_Dictionary* pResources = pForm->m_pFormDict->GetDict(FX_BSTRC("Resources"));
664 m_pParser = FX_NEW CPDF_StreamContentParser;
665 m_pParser->Initialize();
666 m_pParser->PrepareParse(pForm->m_pDocument, pForm->m_pPageResources, pForm->m_pResources, pParentMatrix, pForm,
667 pResources, &form_bbox, pOptions, pGraphicStates, level);
668 m_pParser->m_pCurStates->m_CTM = form_matrix;
669 if (ClipPath.NotNull()) {
670 m_pParser->m_pCurStates->m_ClipPath.AppendPath(ClipPath, FXFILL_WINDING, TRUE);
672 if (pForm->m_Transparency & PDFTRANS_GROUP) {
673 CPDF_GeneralStateData* pData = m_pParser->m_pCurStates->m_GeneralState.GetModify();
674 pData->m_BlendType = FXDIB_BLEND_NORMAL;
675 pData->m_StrokeAlpha = 1.0f;
676 pData->m_FillAlpha = 1.0f;
677 pData->m_pSoftMask = NULL;
679 m_pStreamFilter = pForm->m_pFormStream->GetStreamFilter();
681 m_Status = ToBeContinued;
682 m_InternalStage = PAGEPARSE_STAGE_PARSE;
685 void CPDF_ContentParser::Continue(IFX_Pause* pPause)
687 while (m_Status == ToBeContinued) {
688 if (m_InternalStage == PAGEPARSE_STAGE_PARSE) {
689 if (m_pStreamFilter == NULL) {
690 if (m_CurrentOffset == m_nStreams) {
691 m_InternalStage = PAGEPARSE_STAGE_CHECKCLIP;
693 m_pType3Char->m_bColored = m_pParser->m_bColored;
694 m_pType3Char->m_Width = FXSYS_round(m_pParser->m_Type3Data[0] * 1000);
695 m_pType3Char->m_BBox.left = FXSYS_round(m_pParser->m_Type3Data[2] * 1000);
696 m_pType3Char->m_BBox.bottom = FXSYS_round(m_pParser->m_Type3Data[3] * 1000);
697 m_pType3Char->m_BBox.right = FXSYS_round(m_pParser->m_Type3Data[4] * 1000);
698 m_pType3Char->m_BBox.top = FXSYS_round(m_pParser->m_Type3Data[5] * 1000);
699 m_pType3Char->m_bPageRequired = m_pParser->m_bResourceMissing;
705 CPDF_Object* pContent = m_pObjects->m_pFormDict->GetElementValue(FX_BSTRC("Contents"));
706 if (pContent->GetType() == PDFOBJ_STREAM) {
707 m_pStreamFilter = ((CPDF_Stream*)pContent)->GetStreamFilter();
709 CPDF_Stream* pStream = ((CPDF_Array*)pContent)->GetStream(m_CurrentOffset);
710 if (pStream == NULL) {
714 m_pStreamFilter = pStream->GetStreamFilter();
717 FX_DWORD len = m_pStreamFilter->ReadBlock(m_pParser->m_pStreamBuf, STREAM_PARSE_BUFSIZE);
718 m_pParser->InputData(m_pParser->m_pStreamBuf, len);
719 if (m_pParser->m_bAbort) {
720 delete m_pStreamFilter;
721 m_pStreamFilter = NULL;
727 if (len < STREAM_PARSE_BUFSIZE) {
730 delete m_pStreamFilter;
731 m_pStreamFilter = NULL;
733 if (pPause && pPause->NeedToPauseNow()) {
737 if (m_InternalStage == PAGEPARSE_STAGE_CHECKCLIP) {
738 FX_POSITION pos = m_pObjects->m_ObjectList.GetHeadPosition();
740 CPDF_PageObject* pObj = (CPDF_PageObject*)m_pObjects->m_ObjectList.GetNext(pos);
744 if (pObj->m_ClipPath.IsNull()) {
747 if (pObj->m_ClipPath.GetPathCount() != 1) {
750 if (pObj->m_ClipPath.GetTextCount()) {
753 CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0);
754 if (!ClipPath.IsRect() || pObj->m_Type == PDFPAGE_SHADING) {
757 CFX_FloatRect old_rect(ClipPath.GetPointX(0), ClipPath.GetPointY(0),
758 ClipPath.GetPointX(2), ClipPath.GetPointY(2));
759 CFX_FloatRect obj_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, pObj->m_Top);
760 if (old_rect.Contains(obj_rect)) {
761 pObj->m_ClipPath.SetNull();
764 if (m_pObjects->m_ObjectList.GetCount() == 1) {
765 CPDF_PageObject* pObj = (CPDF_PageObject*)m_pObjects->m_ObjectList.GetAt(m_pObjects->m_ObjectList.GetHeadPosition());
766 if (pObj && pObj->m_Type == PDFPAGE_TEXT) {
767 CPDF_TextObject* pText = (CPDF_TextObject*)pObj;
775 int CPDF_ContentParser::EstimateProgress()
777 if (m_Status == Ready) {
780 if (m_Status == Done) {
783 if (m_InternalStage == PAGEPARSE_STAGE_CHECKCLIP) {
786 if (m_pStreamFilter == NULL) {
787 return 90 * m_CurrentOffset / m_nStreams;
789 int total_raw_size = m_pStreamFilter->GetStream()->GetRawSize() * m_nStreams;
790 int parsed_raw_size = m_pStreamFilter->GetStream()->GetRawSize() * m_CurrentOffset +
791 m_pStreamFilter->GetSrcPos();
792 return 90 * parsed_raw_size / total_raw_size;
794 CPDF_InlineImages::CPDF_InlineImages()
796 m_Type = PDFPAGE_INLINES;
800 CPDF_InlineImages::~CPDF_InlineImages()
803 m_pStream->Release();
809 void CPDF_InlineImages::AddMatrix(CFX_AffineMatrix& matrix)
811 m_Matrices.Add(matrix);
812 CFX_FloatRect rect = matrix.GetUnitRect();
813 if (m_Matrices.GetSize() > 1) {
814 CFX_FloatRect rect1(m_Left, m_Bottom, m_Right, m_Top);
818 m_Right = rect.right;
820 m_Bottom = rect.bottom;