From 6eb7939300d1bc7c31afd5086c1b93d4a7628481 Mon Sep 17 00:00:00 2001 From: Henrique Nakashima Date: Tue, 12 Jun 2018 20:27:35 +0000 Subject: Fill m_ContentStream field in CPDF_PageObject. From the comment of CPDF_PageObject::GetContentStream(): """ Get what content stream the object was parsed from in its page. This number is the index of the content stream in the "Contents" array, or 0 if there is a single content stream. If the object is newly created, -1 is returned. If the object is spread among more than one content stream, this is the index of the last one. """ Bug: pdfium:1051 Change-Id: I9f7804af4f263dda0422e9542e025e3320ff7c31 Reviewed-on: https://pdfium-review.googlesource.com/34250 Commit-Queue: Henrique Nakashima Reviewed-by: dsinclair --- core/fpdfapi/page/cpdf_contentparser.cpp | 15 ++++++---- core/fpdfapi/page/cpdf_contentparser.h | 1 + core/fpdfapi/page/cpdf_formobject.cpp | 7 +++-- core/fpdfapi/page/cpdf_formobject.h | 4 ++- core/fpdfapi/page/cpdf_imageobject.cpp | 5 +++- core/fpdfapi/page/cpdf_imageobject.h | 1 + core/fpdfapi/page/cpdf_pageobject.cpp | 7 ++++- core/fpdfapi/page/cpdf_pageobject.h | 12 ++++++++ core/fpdfapi/page/cpdf_pathobject.cpp | 5 +++- core/fpdfapi/page/cpdf_pathobject.h | 1 + core/fpdfapi/page/cpdf_shadingobject.cpp | 5 ++-- core/fpdfapi/page/cpdf_shadingobject.h | 4 ++- core/fpdfapi/page/cpdf_streamcontentparser.cpp | 41 ++++++++++++++++++-------- core/fpdfapi/page/cpdf_streamcontentparser.h | 7 ++++- core/fpdfapi/page/cpdf_textobject.cpp | 5 +++- core/fpdfapi/page/cpdf_textobject.h | 1 + 16 files changed, 93 insertions(+), 28 deletions(-) (limited to 'core/fpdfapi') diff --git a/core/fpdfapi/page/cpdf_contentparser.cpp b/core/fpdfapi/page/cpdf_contentparser.cpp index 77cfade1da..2bb376e841 100644 --- a/core/fpdfapi/page/cpdf_contentparser.cpp +++ b/core/fpdfapi/page/cpdf_contentparser.cpp @@ -164,11 +164,13 @@ CPDF_ContentParser::Stage CPDF_ContentParser::PrepareContent() { FX_SAFE_UINT32 safeSize = 0; for (const auto& stream : m_StreamArray) { + m_StreamSegmentOffsets.push_back(safeSize.ValueOrDie()); + safeSize += stream->GetSize(); safeSize += 1; + if (!safeSize.IsValid()) + return Stage::kComplete; } - if (!safeSize.IsValid()) - return Stage::kComplete; m_Size = safeSize.ValueOrDie(); m_pData.Reset( @@ -198,9 +200,12 @@ CPDF_ContentParser::Stage CPDF_ContentParser::Parse() { if (m_CurrentOffset >= m_Size) return Stage::kCheckClip; - m_CurrentOffset += - m_pParser->Parse(m_pData.Get() + m_CurrentOffset, - m_Size - m_CurrentOffset, PARSE_STEP_LIMIT); + if (m_StreamSegmentOffsets.empty()) + m_StreamSegmentOffsets.push_back(0); + + m_CurrentOffset += m_pParser->Parse(m_pData.Get() + m_CurrentOffset, + m_Size - m_CurrentOffset, + PARSE_STEP_LIMIT, m_StreamSegmentOffsets); return Stage::kParse; } diff --git a/core/fpdfapi/page/cpdf_contentparser.h b/core/fpdfapi/page/cpdf_contentparser.h index f9b491defa..b5db5d5e13 100644 --- a/core/fpdfapi/page/cpdf_contentparser.h +++ b/core/fpdfapi/page/cpdf_contentparser.h @@ -58,6 +58,7 @@ class CPDF_ContentParser { UnownedPtr m_pType3Char; // Only used when parsing forms. RetainPtr m_pSingleStream; std::vector> m_StreamArray; + std::vector m_StreamSegmentOffsets; MaybeOwned m_pData; uint32_t m_nStreams = 0; uint32_t m_Size = 0; diff --git a/core/fpdfapi/page/cpdf_formobject.cpp b/core/fpdfapi/page/cpdf_formobject.cpp index eca92ca0ff..22ac0d3823 100644 --- a/core/fpdfapi/page/cpdf_formobject.cpp +++ b/core/fpdfapi/page/cpdf_formobject.cpp @@ -10,9 +10,12 @@ #include "core/fpdfapi/page/cpdf_form.h" -CPDF_FormObject::CPDF_FormObject(std::unique_ptr pForm, +CPDF_FormObject::CPDF_FormObject(int32_t content_stream, + std::unique_ptr pForm, const CFX_Matrix& matrix) - : m_pForm(std::move(pForm)), m_FormMatrix(matrix) {} + : CPDF_PageObject(content_stream), + m_pForm(std::move(pForm)), + m_FormMatrix(matrix) {} CPDF_FormObject::~CPDF_FormObject() {} diff --git a/core/fpdfapi/page/cpdf_formobject.h b/core/fpdfapi/page/cpdf_formobject.h index c723cc0eaf..b229dce444 100644 --- a/core/fpdfapi/page/cpdf_formobject.h +++ b/core/fpdfapi/page/cpdf_formobject.h @@ -16,7 +16,9 @@ class CPDF_Form; class CPDF_FormObject : public CPDF_PageObject { public: - CPDF_FormObject(std::unique_ptr pForm, const CFX_Matrix& matrix); + CPDF_FormObject(int32_t content_stream, + std::unique_ptr pForm, + const CFX_Matrix& matrix); ~CPDF_FormObject() override; // CPDF_PageObject: diff --git a/core/fpdfapi/page/cpdf_imageobject.cpp b/core/fpdfapi/page/cpdf_imageobject.cpp index 3b5a740155..516a6e8cad 100644 --- a/core/fpdfapi/page/cpdf_imageobject.cpp +++ b/core/fpdfapi/page/cpdf_imageobject.cpp @@ -12,7 +12,10 @@ #include "core/fpdfapi/page/cpdf_image.h" #include "core/fpdfapi/parser/cpdf_document.h" -CPDF_ImageObject::CPDF_ImageObject() {} +CPDF_ImageObject::CPDF_ImageObject(int32_t content_stream) + : CPDF_PageObject(content_stream) {} + +CPDF_ImageObject::CPDF_ImageObject() : CPDF_ImageObject(kNoContentStream) {} CPDF_ImageObject::~CPDF_ImageObject() { MaybePurgeCache(); diff --git a/core/fpdfapi/page/cpdf_imageobject.h b/core/fpdfapi/page/cpdf_imageobject.h index 16a506e537..d54ef8d818 100644 --- a/core/fpdfapi/page/cpdf_imageobject.h +++ b/core/fpdfapi/page/cpdf_imageobject.h @@ -16,6 +16,7 @@ class CPDF_Image; class CPDF_ImageObject : public CPDF_PageObject { public: + explicit CPDF_ImageObject(int32_t content_stream); CPDF_ImageObject(); ~CPDF_ImageObject() override; diff --git a/core/fpdfapi/page/cpdf_pageobject.cpp b/core/fpdfapi/page/cpdf_pageobject.cpp index 8bb5bf5978..604309f6e6 100644 --- a/core/fpdfapi/page/cpdf_pageobject.cpp +++ b/core/fpdfapi/page/cpdf_pageobject.cpp @@ -6,7 +6,12 @@ #include "core/fpdfapi/page/cpdf_pageobject.h" -CPDF_PageObject::CPDF_PageObject() : m_bDirty(false) {} +constexpr int32_t CPDF_PageObject::kNoContentStream; + +CPDF_PageObject::CPDF_PageObject(int32_t content_stream) + : m_bDirty(false), m_ContentStream(content_stream) {} + +CPDF_PageObject::CPDF_PageObject() : CPDF_PageObject(kNoContentStream) {} CPDF_PageObject::~CPDF_PageObject() {} diff --git a/core/fpdfapi/page/cpdf_pageobject.h b/core/fpdfapi/page/cpdf_pageobject.h index d23cd971f8..39e7629541 100644 --- a/core/fpdfapi/page/cpdf_pageobject.h +++ b/core/fpdfapi/page/cpdf_pageobject.h @@ -28,6 +28,9 @@ class CPDF_PageObject : public CPDF_GraphicStates { FORM, }; + static constexpr int32_t kNoContentStream = -1; + + explicit CPDF_PageObject(int32_t content_stream); CPDF_PageObject(); ~CPDF_PageObject() override; @@ -59,6 +62,14 @@ class CPDF_PageObject : public CPDF_GraphicStates { } FX_RECT GetBBox(const CFX_Matrix* pMatrix) const; + // Get what content stream the object was parsed from in its page. This number + // is the index of the content stream in the "Contents" array, or 0 if there + // is a single content stream. If the object is newly created, + // kNoContentStream is returned. + // If the object is spread among more than one content stream, this is the + // index of the last stream. + int32_t GetContentStream() const { return m_ContentStream; } + float m_Left; float m_Right; float m_Top; @@ -73,6 +84,7 @@ class CPDF_PageObject : public CPDF_GraphicStates { void operator=(const CPDF_PageObject& src) = delete; bool m_bDirty; + int32_t m_ContentStream; }; #endif // CORE_FPDFAPI_PAGE_CPDF_PAGEOBJECT_H_ diff --git a/core/fpdfapi/page/cpdf_pathobject.cpp b/core/fpdfapi/page/cpdf_pathobject.cpp index d8c2cb8741..0882dc17f0 100644 --- a/core/fpdfapi/page/cpdf_pathobject.cpp +++ b/core/fpdfapi/page/cpdf_pathobject.cpp @@ -6,7 +6,10 @@ #include "core/fpdfapi/page/cpdf_pathobject.h" -CPDF_PathObject::CPDF_PathObject() : m_FillType(0), m_bStroke(false) {} +CPDF_PathObject::CPDF_PathObject(int32_t content_stream) + : CPDF_PageObject(content_stream), m_FillType(0), m_bStroke(false) {} + +CPDF_PathObject::CPDF_PathObject() : CPDF_PathObject(kNoContentStream) {} CPDF_PathObject::~CPDF_PathObject() {} diff --git a/core/fpdfapi/page/cpdf_pathobject.h b/core/fpdfapi/page/cpdf_pathobject.h index 58499b5a53..5155c40c3d 100644 --- a/core/fpdfapi/page/cpdf_pathobject.h +++ b/core/fpdfapi/page/cpdf_pathobject.h @@ -14,6 +14,7 @@ class CPDF_PathObject : public CPDF_PageObject { public: + explicit CPDF_PathObject(int32_t content_stream); CPDF_PathObject(); ~CPDF_PathObject() override; diff --git a/core/fpdfapi/page/cpdf_shadingobject.cpp b/core/fpdfapi/page/cpdf_shadingobject.cpp index 1b16ac4862..725e2e4591 100644 --- a/core/fpdfapi/page/cpdf_shadingobject.cpp +++ b/core/fpdfapi/page/cpdf_shadingobject.cpp @@ -9,9 +9,10 @@ #include "core/fpdfapi/page/cpdf_shadingpattern.h" #include "core/fpdfapi/parser/cpdf_document.h" -CPDF_ShadingObject::CPDF_ShadingObject(CPDF_ShadingPattern* pattern, +CPDF_ShadingObject::CPDF_ShadingObject(int32_t content_stream, + CPDF_ShadingPattern* pattern, const CFX_Matrix& matrix) - : m_pShading(pattern), m_Matrix(matrix) {} + : CPDF_PageObject(content_stream), m_pShading(pattern), m_Matrix(matrix) {} CPDF_ShadingObject::~CPDF_ShadingObject() {} diff --git a/core/fpdfapi/page/cpdf_shadingobject.h b/core/fpdfapi/page/cpdf_shadingobject.h index 80e062c729..69b606749d 100644 --- a/core/fpdfapi/page/cpdf_shadingobject.h +++ b/core/fpdfapi/page/cpdf_shadingobject.h @@ -15,7 +15,9 @@ class CPDF_ShadingPattern; class CPDF_ShadingObject : public CPDF_PageObject { public: - CPDF_ShadingObject(CPDF_ShadingPattern* pattern, const CFX_Matrix& matrix); + CPDF_ShadingObject(int32_t content_stream, + CPDF_ShadingPattern* pattern, + const CFX_Matrix& matrix); ~CPDF_ShadingObject() override; // CPDF_PageObject: diff --git a/core/fpdfapi/page/cpdf_streamcontentparser.cpp b/core/fpdfapi/page/cpdf_streamcontentparser.cpp index 7562fb37a6..0cc81f1e9b 100644 --- a/core/fpdfapi/page/cpdf_streamcontentparser.cpp +++ b/core/fpdfapi/page/cpdf_streamcontentparser.cpp @@ -785,7 +785,8 @@ void CPDF_StreamContentParser::AddForm(CPDF_Stream* pStream) { CFX_Matrix matrix = m_pCurStates->m_CTM; matrix.Concat(m_mtContentToUser); - auto pFormObj = pdfium::MakeUnique(std::move(form), matrix); + auto pFormObj = pdfium::MakeUnique(GetCurrentStreamIndex(), + std::move(form), matrix); if (!m_pObjectHolder->BackgroundAlphaNeeded() && pFormObj->form()->BackgroundAlphaNeeded()) { m_pObjectHolder->SetBackgroundAlphaNeeded(true); @@ -800,14 +801,16 @@ CPDF_ImageObject* CPDF_StreamContentParser::AddImage( if (!pStream) return nullptr; - auto pImageObj = pdfium::MakeUnique(); + auto pImageObj = + pdfium::MakeUnique(GetCurrentStreamIndex()); pImageObj->SetImage( pdfium::MakeRetain(m_pDocument.Get(), std::move(pStream))); return AddImageObject(std::move(pImageObj)); } CPDF_ImageObject* CPDF_StreamContentParser::AddImage(uint32_t streamObjNum) { - auto pImageObj = pdfium::MakeUnique(); + auto pImageObj = + pdfium::MakeUnique(GetCurrentStreamIndex()); pImageObj->SetImage(m_pDocument->LoadImageFromPageData(streamObjNum)); return AddImageObject(std::move(pImageObj)); } @@ -817,7 +820,8 @@ CPDF_ImageObject* CPDF_StreamContentParser::AddImage( if (!pImage) return nullptr; - auto pImageObj = pdfium::MakeUnique(); + auto pImageObj = + pdfium::MakeUnique(GetCurrentStreamIndex()); pImageObj->SetImage( m_pDocument->GetPageData()->GetImage(pImage->GetStream()->GetObjNum())); @@ -1084,7 +1088,8 @@ void CPDF_StreamContentParser::Handle_ShadeFill() { CFX_Matrix matrix = m_pCurStates->m_CTM; matrix.Concat(m_mtContentToUser); - auto pObj = pdfium::MakeUnique(pShading, matrix); + auto pObj = pdfium::MakeUnique(GetCurrentStreamIndex(), + pShading, matrix); SetGraphicStates(pObj.get(), false, false, false); CFX_FloatRect bbox = pObj->m_ClipPath.HasRef() ? pObj->m_ClipPath.GetClipBox() : m_BBox; @@ -1219,7 +1224,7 @@ void CPDF_StreamContentParser::AddTextObject(ByteString* pStrs, pFont->IsType3Font() ? TextRenderingMode::MODE_FILL : m_pCurStates->m_TextState.GetTextMode(); { - auto pText = pdfium::MakeUnique(); + auto pText = pdfium::MakeUnique(GetCurrentStreamIndex()); m_pLastTextObject = pText.get(); SetGraphicStates(m_pLastTextObject.Get(), true, true, true); if (TextRenderingModeIsStrokeMode(text_mode)) { @@ -1258,6 +1263,12 @@ void CPDF_StreamContentParser::AddTextObject(ByteString* pStrs, } } +int32_t CPDF_StreamContentParser::GetCurrentStreamIndex() { + auto it = std::upper_bound(m_StreamStartOffsets.begin(), + m_StreamStartOffsets.end(), m_pSyntax->GetPos()); + return (it - m_StreamStartOffsets.begin()) - 1; +} + void CPDF_StreamContentParser::Handle_ShowText() { ByteString str = GetString(0); if (str.IsEmpty()) { @@ -1456,7 +1467,8 @@ void CPDF_StreamContentParser::AddPathObject(int FillType, bool bStroke) { CFX_Matrix matrix = m_pCurStates->m_CTM; matrix.Concat(m_mtContentToUser); if (bStroke || FillType) { - auto pPathObj = pdfium::MakeUnique(); + auto pPathObj = + pdfium::MakeUnique(GetCurrentStreamIndex()); pPathObj->m_bStroke = bStroke; pPathObj->m_FillType = FillType; pPathObj->m_Path = Path; @@ -1474,22 +1486,27 @@ void CPDF_StreamContentParser::AddPathObject(int FillType, bool bStroke) { } } -uint32_t CPDF_StreamContentParser::Parse(const uint8_t* pData, - uint32_t dwSize, - uint32_t max_cost) { +uint32_t CPDF_StreamContentParser::Parse( + const uint8_t* pData, + uint32_t dwSize, + uint32_t max_cost, + const std::vector& stream_start_offsets) { if (m_ParsedSet->size() > kMaxFormLevel || pdfium::ContainsKey(*m_ParsedSet, pData)) return dwSize; + m_StreamStartOffsets = stream_start_offsets; + pdfium::ScopedSetInsertion scopedInsert(m_ParsedSet.Get(), pData); - uint32_t InitObjCount = m_pObjectHolder->GetPageObjectList()->size(); + uint32_t init_obj_count = m_pObjectHolder->GetPageObjectList()->size(); CPDF_StreamParser syntax(pdfium::make_span(pData, dwSize), m_pDocument->GetByteStringPool()); CPDF_StreamParserAutoClearer auto_clearer(&m_pSyntax, &syntax); while (1) { - uint32_t cost = m_pObjectHolder->GetPageObjectList()->size() - InitObjCount; + uint32_t cost = + m_pObjectHolder->GetPageObjectList()->size() - init_obj_count; if (max_cost && cost >= max_cost) { break; } diff --git a/core/fpdfapi/page/cpdf_streamcontentparser.h b/core/fpdfapi/page/cpdf_streamcontentparser.h index 438be024cc..adcb2a5b47 100644 --- a/core/fpdfapi/page/cpdf_streamcontentparser.h +++ b/core/fpdfapi/page/cpdf_streamcontentparser.h @@ -43,7 +43,10 @@ class CPDF_StreamContentParser { std::set* parsedSet); ~CPDF_StreamContentParser(); - uint32_t Parse(const uint8_t* pData, uint32_t dwSize, uint32_t max_cost); + uint32_t Parse(const uint8_t* pData, + uint32_t dwSize, + uint32_t max_cost, + const std::vector& stream_start_offsets); CPDF_PageObjectHolder* GetPageObjectHolder() const { return m_pObjectHolder.Get(); } @@ -127,6 +130,7 @@ class CPDF_StreamContentParser { std::vector GetColors() const; std::vector GetNamedColors() const; + int32_t GetCurrentStreamIndex(); void Handle_CloseFillStrokePath(); void Handle_FillStrokePath(); @@ -230,6 +234,7 @@ class CPDF_StreamContentParser { std::vector> m_StateStack; float m_Type3Data[6]; ContentParam m_ParamBuf[kParamBufSize]; + std::vector m_StreamStartOffsets; }; #endif // CORE_FPDFAPI_PAGE_CPDF_STREAMCONTENTPARSER_H_ diff --git a/core/fpdfapi/page/cpdf_textobject.cpp b/core/fpdfapi/page/cpdf_textobject.cpp index 36a4722773..e678d5fc10 100644 --- a/core/fpdfapi/page/cpdf_textobject.cpp +++ b/core/fpdfapi/page/cpdf_textobject.cpp @@ -18,7 +18,10 @@ CPDF_TextObjectItem::CPDF_TextObjectItem() : m_CharCode(0) {} CPDF_TextObjectItem::~CPDF_TextObjectItem() = default; -CPDF_TextObject::CPDF_TextObject() {} +CPDF_TextObject::CPDF_TextObject(int32_t content_stream) + : CPDF_PageObject(content_stream) {} + +CPDF_TextObject::CPDF_TextObject() : CPDF_TextObject(kNoContentStream) {} CPDF_TextObject::~CPDF_TextObject() { // Move m_CharCodes to a local variable so it will be captured in crash dumps, diff --git a/core/fpdfapi/page/cpdf_textobject.h b/core/fpdfapi/page/cpdf_textobject.h index a6fc62369c..d3b6dcc3de 100644 --- a/core/fpdfapi/page/cpdf_textobject.h +++ b/core/fpdfapi/page/cpdf_textobject.h @@ -25,6 +25,7 @@ class CPDF_TextObjectItem { class CPDF_TextObject : public CPDF_PageObject { public: + explicit CPDF_TextObject(int32_t content_stream); CPDF_TextObject(); ~CPDF_TextObject() override; -- cgit v1.2.3