From 657a1aa0700b437bd159007a97c8219c50c461a7 Mon Sep 17 00:00:00 2001
From: Henrique Nakashima <hnakashima@chromium.org>
Date: Wed, 12 Sep 2018 16:19:22 +0000
Subject: Set correct stream index when parsing is done in several steps.

When parsing happens in several steps (in pages with > 100 page
objects), the position is reset to 0 and the start pointer is
advanced. This breaks the calculation of which stream an object
belongs to.

Passing in the base pointer separately from the start offset allows
the correct position to be calculated and the correct stream to be
identified.

Change-Id: Ic0d5f59f437609158aa97b3c8a18dbd48cd3b0d4
Reviewed-on: https://pdfium-review.googlesource.com/42270
Commit-Queue: Henrique Nakashima <hnakashima@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
---
 core/fpdfapi/page/cpdf_contentparser.cpp       |  3 +--
 core/fpdfapi/page/cpdf_streamcontentparser.cpp | 23 +++++++++++++++++------
 core/fpdfapi/page/cpdf_streamcontentparser.h   |  7 +++++++
 3 files changed, 25 insertions(+), 8 deletions(-)

(limited to 'core')
diff --git a/core/fpdfapi/page/cpdf_contentparser.cpp b/core/fpdfapi/page/cpdf_contentparser.cpp
index f93cf47eb8..2cbac8f447 100644
--- a/core/fpdfapi/page/cpdf_contentparser.cpp
+++ b/core/fpdfapi/page/cpdf_contentparser.cpp
@@ -208,8 +208,7 @@ CPDF_ContentParser::Stage CPDF_ContentParser::Parse() {
   if (m_StreamSegmentOffsets.empty())
     m_StreamSegmentOffsets.push_back(0);
 
-  m_CurrentOffset += m_pParser->Parse(m_pData.Get() + m_CurrentOffset,
-                                      m_Size - m_CurrentOffset,
+  m_CurrentOffset += m_pParser->Parse(m_pData.Get(), m_Size, m_CurrentOffset,
                                       PARSE_STEP_LIMIT, m_StreamSegmentOffsets);
   return Stage::kParse;
 }
diff --git a/core/fpdfapi/page/cpdf_streamcontentparser.cpp b/core/fpdfapi/page/cpdf_streamcontentparser.cpp
index d0be6b3193..2244232ecb 100644
--- a/core/fpdfapi/page/cpdf_streamcontentparser.cpp
+++ b/core/fpdfapi/page/cpdf_streamcontentparser.cpp
@@ -1286,8 +1286,9 @@ float CPDF_StreamContentParser::GetVerticalTextSize(float fKerning) const {
 }
 
 int32_t CPDF_StreamContentParser::GetCurrentStreamIndex() {
-  auto it = std::upper_bound(m_StreamStartOffsets.begin(),
-                             m_StreamStartOffsets.end(), m_pSyntax->GetPos());
+  auto it =
+      std::upper_bound(m_StreamStartOffsets.begin(), m_StreamStartOffsets.end(),
+                       m_pSyntax->GetPos() + m_StartParseOffset);
   return (it - m_StreamStartOffsets.begin()) - 1;
 }
 
@@ -1508,19 +1509,29 @@ void CPDF_StreamContentParser::AddPathObject(int FillType, bool bStroke) {
 uint32_t CPDF_StreamContentParser::Parse(
     const uint8_t* pData,
     uint32_t dwSize,
+    uint32_t start_offset,
     uint32_t max_cost,
     const std::vector<uint32_t>& stream_start_offsets) {
+  ASSERT(start_offset < dwSize);
+
+  // Parsing will be done from |pDataStart|, for at most |size_left| bytes.
+  const uint8_t* pDataStart = pData + start_offset;
+  uint32_t size_left = dwSize - start_offset;
+
+  m_StartParseOffset = start_offset;
+
   if (m_ParsedSet->size() > kMaxFormLevel ||
-      pdfium::ContainsKey(*m_ParsedSet, pData))
-    return dwSize;
+      pdfium::ContainsKey(*m_ParsedSet, pDataStart)) {
+    return size_left;
+  }
 
   m_StreamStartOffsets = stream_start_offsets;
 
   pdfium::ScopedSetInsertion<const uint8_t*> scopedInsert(m_ParsedSet.Get(),
-                                                          pData);
+                                                          pDataStart);
 
   uint32_t init_obj_count = m_pObjectHolder->GetPageObjectList()->size();
-  CPDF_StreamParser syntax(pdfium::make_span(pData, dwSize),
+  CPDF_StreamParser syntax(pdfium::make_span(pDataStart, size_left),
                            m_pDocument->GetByteStringPool());
   CPDF_StreamParserAutoClearer auto_clearer(&m_pSyntax, &syntax);
   while (1) {
diff --git a/core/fpdfapi/page/cpdf_streamcontentparser.h b/core/fpdfapi/page/cpdf_streamcontentparser.h
index aac66e883b..a129894dba 100644
--- a/core/fpdfapi/page/cpdf_streamcontentparser.h
+++ b/core/fpdfapi/page/cpdf_streamcontentparser.h
@@ -46,6 +46,7 @@ class CPDF_StreamContentParser {
 
   uint32_t Parse(const uint8_t* pData,
                  uint32_t dwSize,
+                 uint32_t start_offset,
                  uint32_t max_cost,
                  const std::vector<uint32_t>& stream_start_offsets);
   CPDF_PageObjectHolder* GetPageObjectHolder() const {
@@ -237,7 +238,13 @@ class CPDF_StreamContentParser {
   std::vector<std::unique_ptr<CPDF_AllStates>> m_StateStack;
   float m_Type3Data[6];
   ContentParam m_ParamBuf[kParamBufSize];
+
+  // The merged stream offsets at which a content stream ends and another
+  // begins.
   std::vector<uint32_t> m_StreamStartOffsets;
+
+  // The merged stream offset at which the last |m_pSyntax| started parsing.
+  uint32_t m_StartParseOffset = 0;
 };
 
 #endif  // CORE_FPDFAPI_PAGE_CPDF_STREAMCONTENTPARSER_H_
-- 
cgit v1.2.3