Set correct stream index when parsing is done in several steps.

When parsing happens in several steps (in pages with > 100 page objects), the position is reset to 0 and the start pointer is advanced. This breaks the calculation of which stream an object belongs to. Passing in the base pointer separately from the start offset allows the correct position to be calculated and the correct stream to be identified. Change-Id: Ic0d5f59f437609158aa97b3c8a18dbd48cd3b0d4 Reviewed-on: https://pdfium-review.googlesource.com/42270 Commit-Queue: Henrique Nakashima <hnakashima@chromium.org> Reviewed-by: Lei Zhang <thestig@chromium.org>
author: Henrique Nakashima <hnakashima@chromium.org> 2018-09-12 16:19:22 +0000
committer: Chromium commit bot <commit-bot@chromium.org> 2018-09-12 16:19:22 +0000
commit: 657a1aa0700b437bd159007a97c8219c50c461a7 (patch)
tree: 459aa2803a2115a853c07882de2aefde0402c7b8 /core
parent: 5c86fd4c5110a99606316721786f1ba9bf0d855a (diff)
download: pdfium-657a1aa0700b437bd159007a97c8219c50c461a7.tar.xz
3 files changed, 25 insertions, 8 deletions
diff --git a/core/fpdfapi/page/cpdf_contentparser.cpp b/core/fpdfapi/page/cpdf_contentparser.cpp
index f93cf47eb8..2cbac8f447 100644
--- a/core/fpdfapi/page/cpdf_contentparser.cpp
+++ b/core/fpdfapi/page/cpdf_contentparser.cpp
@@ -208,8 +208,7 @@ CPDF_ContentParser::Stage CPDF_ContentParser::Parse() {
   if (m_StreamSegmentOffsets.empty())
     m_StreamSegmentOffsets.push_back(0);
 
-  m_CurrentOffset += m_pParser->Parse(m_pData.Get() + m_CurrentOffset,
-                                      m_Size - m_CurrentOffset,
+  m_CurrentOffset += m_pParser->Parse(m_pData.Get(), m_Size, m_CurrentOffset,
                                       PARSE_STEP_LIMIT, m_StreamSegmentOffsets);
   return Stage::kParse;
 }
diff --git a/core/fpdfapi/page/cpdf_streamcontentparser.cpp b/core/fpdfapi/page/cpdf_streamcontentparser.cpp
index d0be6b3193..2244232ecb 100644
--- a/core/fpdfapi/page/cpdf_streamcontentparser.cpp
+++ b/core/fpdfapi/page/cpdf_streamcontentparser.cpp
@@ -1286,8 +1286,9 @@ float CPDF_StreamContentParser::GetVerticalTextSize(float fKerning) const {
 }
 
 int32_t CPDF_StreamContentParser::GetCurrentStreamIndex() {
-  auto it = std::upper_bound(m_StreamStartOffsets.begin(),
-                             m_StreamStartOffsets.end(), m_pSyntax->GetPos());
+  auto it =
+      std::upper_bound(m_StreamStartOffsets.begin(), m_StreamStartOffsets.end(),
+                       m_pSyntax->GetPos() + m_StartParseOffset);
   return (it - m_StreamStartOffsets.begin()) - 1;
 }
 
@@ -1508,19 +1509,29 @@ void CPDF_StreamContentParser::AddPathObject(int FillType, bool bStroke) {
 uint32_t CPDF_StreamContentParser::Parse(
     const uint8_t* pData,
     uint32_t dwSize,
+    uint32_t start_offset,
     uint32_t max_cost,
     const std::vector<uint32_t>& stream_start_offsets) {
+  ASSERT(start_offset < dwSize);
+
+  // Parsing will be done from |pDataStart|, for at most |size_left| bytes.
+  const uint8_t* pDataStart = pData + start_offset;
+  uint32_t size_left = dwSize - start_offset;
+
+  m_StartParseOffset = start_offset;
+
   if (m_ParsedSet->size() > kMaxFormLevel ||
-      pdfium::ContainsKey(*m_ParsedSet, pData))
-    return dwSize;
+      pdfium::ContainsKey(*m_ParsedSet, pDataStart)) {
+    return size_left;
+  }
 
   m_StreamStartOffsets = stream_start_offsets;
 
   pdfium::ScopedSetInsertion<const uint8_t*> scopedInsert(m_ParsedSet.Get(),
-                                                          pData);
+                                                          pDataStart);
 
   uint32_t init_obj_count = m_pObjectHolder->GetPageObjectList()->size();
-  CPDF_StreamParser syntax(pdfium::make_span(pData, dwSize),
+  CPDF_StreamParser syntax(pdfium::make_span(pDataStart, size_left),
                            m_pDocument->GetByteStringPool());
   CPDF_StreamParserAutoClearer auto_clearer(&m_pSyntax, &syntax);
   while (1) {
diff --git a/core/fpdfapi/page/cpdf_streamcontentparser.h b/core/fpdfapi/page/cpdf_streamcontentparser.h
index aac66e883b..a129894dba 100644
--- a/core/fpdfapi/page/cpdf_streamcontentparser.h
+++ b/core/fpdfapi/page/cpdf_streamcontentparser.h
@@ -46,6 +46,7 @@ class CPDF_StreamContentParser {
 
   uint32_t Parse(const uint8_t* pData,
                  uint32_t dwSize,
+                 uint32_t start_offset,
                  uint32_t max_cost,
                  const std::vector<uint32_t>& stream_start_offsets);
   CPDF_PageObjectHolder* GetPageObjectHolder() const {
@@ -237,7 +238,13 @@ class CPDF_StreamContentParser {
   std::vector<std::unique_ptr<CPDF_AllStates>> m_StateStack;
   float m_Type3Data[6];
   ContentParam m_ParamBuf[kParamBufSize];
+
+  // The merged stream offsets at which a content stream ends and another
+  // begins.
   std::vector<uint32_t> m_StreamStartOffsets;
+
+  // The merged stream offset at which the last |m_pSyntax| started parsing.
+  uint32_t m_StartParseOffset = 0;
 };
 
 #endif  // CORE_FPDFAPI_PAGE_CPDF_STREAMCONTENTPARSER_H_
author	Henrique Nakashima <hnakashima@chromium.org>	2018-09-12 16:19:22 +0000
committer	Chromium commit bot <commit-bot@chromium.org>	2018-09-12 16:19:22 +0000
commit	657a1aa0700b437bd159007a97c8219c50c461a7 (patch)
tree	459aa2803a2115a853c07882de2aefde0402c7b8 /core
parent	5c86fd4c5110a99606316721786f1ba9bf0d855a (diff)
download	pdfium-657a1aa0700b437bd159007a97c8219c50c461a7.tar.xz