From 657a1aa0700b437bd159007a97c8219c50c461a7 Mon Sep 17 00:00:00 2001 From: Henrique Nakashima Date: Wed, 12 Sep 2018 16:19:22 +0000 Subject: Set correct stream index when parsing is done in several steps. When parsing happens in several steps (in pages with > 100 page objects), the position is reset to 0 and the start pointer is advanced. This breaks the calculation of which stream an object belongs to. Passing in the base pointer separately from the start offset allows the correct position to be calculated and the correct stream to be identified. Change-Id: Ic0d5f59f437609158aa97b3c8a18dbd48cd3b0d4 Reviewed-on: https://pdfium-review.googlesource.com/42270 Commit-Queue: Henrique Nakashima Reviewed-by: Lei Zhang --- core/fpdfapi/page/cpdf_contentparser.cpp | 3 +-- core/fpdfapi/page/cpdf_streamcontentparser.cpp | 23 +++++++++++++++++------ core/fpdfapi/page/cpdf_streamcontentparser.h | 7 +++++++ 3 files changed, 25 insertions(+), 8 deletions(-) (limited to 'core') diff --git a/core/fpdfapi/page/cpdf_contentparser.cpp b/core/fpdfapi/page/cpdf_contentparser.cpp index f93cf47eb8..2cbac8f447 100644 --- a/core/fpdfapi/page/cpdf_contentparser.cpp +++ b/core/fpdfapi/page/cpdf_contentparser.cpp @@ -208,8 +208,7 @@ CPDF_ContentParser::Stage CPDF_ContentParser::Parse() { if (m_StreamSegmentOffsets.empty()) m_StreamSegmentOffsets.push_back(0); - m_CurrentOffset += m_pParser->Parse(m_pData.Get() + m_CurrentOffset, - m_Size - m_CurrentOffset, + m_CurrentOffset += m_pParser->Parse(m_pData.Get(), m_Size, m_CurrentOffset, PARSE_STEP_LIMIT, m_StreamSegmentOffsets); return Stage::kParse; } diff --git a/core/fpdfapi/page/cpdf_streamcontentparser.cpp b/core/fpdfapi/page/cpdf_streamcontentparser.cpp index d0be6b3193..2244232ecb 100644 --- a/core/fpdfapi/page/cpdf_streamcontentparser.cpp +++ b/core/fpdfapi/page/cpdf_streamcontentparser.cpp @@ -1286,8 +1286,9 @@ float CPDF_StreamContentParser::GetVerticalTextSize(float fKerning) const { } int32_t CPDF_StreamContentParser::GetCurrentStreamIndex() { - auto it = std::upper_bound(m_StreamStartOffsets.begin(), - m_StreamStartOffsets.end(), m_pSyntax->GetPos()); + auto it = + std::upper_bound(m_StreamStartOffsets.begin(), m_StreamStartOffsets.end(), + m_pSyntax->GetPos() + m_StartParseOffset); return (it - m_StreamStartOffsets.begin()) - 1; } @@ -1508,19 +1509,29 @@ void CPDF_StreamContentParser::AddPathObject(int FillType, bool bStroke) { uint32_t CPDF_StreamContentParser::Parse( const uint8_t* pData, uint32_t dwSize, + uint32_t start_offset, uint32_t max_cost, const std::vector& stream_start_offsets) { + ASSERT(start_offset < dwSize); + + // Parsing will be done from |pDataStart|, for at most |size_left| bytes. + const uint8_t* pDataStart = pData + start_offset; + uint32_t size_left = dwSize - start_offset; + + m_StartParseOffset = start_offset; + if (m_ParsedSet->size() > kMaxFormLevel || - pdfium::ContainsKey(*m_ParsedSet, pData)) - return dwSize; + pdfium::ContainsKey(*m_ParsedSet, pDataStart)) { + return size_left; + } m_StreamStartOffsets = stream_start_offsets; pdfium::ScopedSetInsertion scopedInsert(m_ParsedSet.Get(), - pData); + pDataStart); uint32_t init_obj_count = m_pObjectHolder->GetPageObjectList()->size(); - CPDF_StreamParser syntax(pdfium::make_span(pData, dwSize), + CPDF_StreamParser syntax(pdfium::make_span(pDataStart, size_left), m_pDocument->GetByteStringPool()); CPDF_StreamParserAutoClearer auto_clearer(&m_pSyntax, &syntax); while (1) { diff --git a/core/fpdfapi/page/cpdf_streamcontentparser.h b/core/fpdfapi/page/cpdf_streamcontentparser.h index aac66e883b..a129894dba 100644 --- a/core/fpdfapi/page/cpdf_streamcontentparser.h +++ b/core/fpdfapi/page/cpdf_streamcontentparser.h @@ -46,6 +46,7 @@ class CPDF_StreamContentParser { uint32_t Parse(const uint8_t* pData, uint32_t dwSize, + uint32_t start_offset, uint32_t max_cost, const std::vector& stream_start_offsets); CPDF_PageObjectHolder* GetPageObjectHolder() const { @@ -237,7 +238,13 @@ class CPDF_StreamContentParser { std::vector> m_StateStack; float m_Type3Data[6]; ContentParam m_ParamBuf[kParamBufSize]; + + // The merged stream offsets at which a content stream ends and another + // begins. std::vector m_StreamStartOffsets; + + // The merged stream offset at which the last |m_pSyntax| started parsing. + uint32_t m_StartParseOffset = 0; }; #endif // CORE_FPDFAPI_PAGE_CPDF_STREAMCONTENTPARSER_H_ -- cgit v1.2.3