Fixing metadata not read from linearized file.chromium/3136

This still won't work if the info dict is not on the first page without first calling FPDFAvail_IsFormAvail or FPDFAvail_IsPageAvail, as these are the methods that trigger parsing the rest of the data. Bug: pdfium:664 Change-Id: I0b0193e415a1153dcfb8bfba0e0482da6b6ba53c Reviewed-on: https://pdfium-review.googlesource.com/6610 Commit-Queue: Henrique Nakashima <hnakashima@chromium.org> Reviewed-by: dsinclair <dsinclair@chromium.org> Reviewed-by: Nicolás Peña <npm@chromium.org>
author: Henrique Nakashima <hnakashima@chromium.org> 2017-06-19 16:04:34 -0400
committer: Chromium commit bot <commit-bot@chromium.org> 2017-06-19 20:44:58 +0000
commit: b73ce7bcd72a97c780176d9f63681a5415c06422 (patch)
tree: 2e0b3b4f2996344101b9af67ce761708255b05ef /core/fpdfapi
parent: d3610056e36b9211e9ed3806a3f99a1184e9413e (diff)
download: pdfium-b73ce7bcd72a97c780176d9f63681a5415c06422.tar.xz
3 files changed, 22 insertions, 2 deletions
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp
index 1fdd59e615..9bfb16b97e 100644
--- a/core/fpdfapi/parser/cpdf_document.cpp
+++ b/core/fpdfapi/parser/cpdf_document.cpp
@@ -374,6 +374,10 @@ void CPDF_Document::LoadDocInternal() {
   if (!m_pRootDict)
     return;
 
+  LoadDocumentInfo();
+}
+
+void CPDF_Document::LoadDocumentInfo() {
   CPDF_Object* pInfoObj = GetOrParseIndirectObject(m_pParser->GetInfoObjNum());
   if (pInfoObj)
     m_pInfoDict = pInfoObj->GetDict();
diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h
index 493c1edb32..e6107e1040 100644
--- a/core/fpdfapi/parser/cpdf_document.h
+++ b/core/fpdfapi/parser/cpdf_document.h
@@ -87,6 +87,7 @@ class CPDF_Document : public CPDF_IndirectObjectHolder {
   void LoadDoc();
   void LoadLinearizedDoc(const CPDF_LinearizedHeader* pLinearizationParams);
   void LoadPages();
+  void LoadDocumentInfo();
 
   void CreateNewDoc();
   CPDF_Dictionary* CreateNewPage(int iPage);
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index 01c3b8ccd1..b2bde8d242 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -360,6 +360,9 @@ bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
   if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount))
     return false;
 
+  if (m_pTrailer)
+    m_Trailers.push_back(std::move(m_pTrailer));
+
   m_pTrailer = LoadTrailerV4();
   if (!m_pTrailer)
     return false;
@@ -1084,7 +1087,18 @@ uint32_t CPDF_Parser::GetRootObjNum() {
 uint32_t CPDF_Parser::GetInfoObjNum() {
   CPDF_Reference* pRef =
       ToReference(m_pTrailer ? m_pTrailer->GetObjectFor("Info") : nullptr);
-  return pRef ? pRef->GetRefObjNum() : 0;
+  if (pRef)
+    return pRef->GetRefObjNum();
+
+  // Search trailers array from latest to earliest revision, as we want the
+  // most recent Info object number.
+  // See PDF 1.7 spec, section 3.4.5 - Incremental Updates.
+  for (auto it = m_Trailers.rbegin(); it != m_Trailers.rend(); ++it) {
+    pRef = ToReference(it->get()->GetObjectFor("Info"));
+    if (pRef)
+      return pRef->GetRefObjNum();
+  }
+  return 0;
 }
 
 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObject(
@@ -1545,7 +1559,8 @@ bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
   uint32_t dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum;
   m_pSyntax->m_MetadataObjnum = 0;
-  m_pTrailer.reset();
+  if (m_pTrailer)
+    m_Trailers.push_back(std::move(m_pTrailer));
   m_pSyntax->SetPos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset);
 
   uint8_t ch = 0;
author	Henrique Nakashima <hnakashima@chromium.org>	2017-06-19 16:04:34 -0400
committer	Chromium commit bot <commit-bot@chromium.org>	2017-06-19 20:44:58 +0000
commit	b73ce7bcd72a97c780176d9f63681a5415c06422 (patch)
tree	2e0b3b4f2996344101b9af67ce761708255b05ef /core/fpdfapi
parent	d3610056e36b9211e9ed3806a3f99a1184e9413e (diff)
download	pdfium-b73ce7bcd72a97c780176d9f63681a5415c06422.tar.xz