Rework of Fixing metadata not read from linearized file.

Move receiving "Info" dictionary form API implementation to CPDF_Document. Also added test. Bug: pdfium:664 Change-Id: I273980750fbdd4d20711f651245780fc9ba02789 Reviewed-on: https://pdfium-review.googlesource.com/35490 Commit-Queue: Art Snake <art-snake@yandex-team.ru> Reviewed-by: Henrique Nakashima <hnakashima@chromium.org>
author: Artem Strygin <art-snake@yandex-team.ru> 2018-06-22 12:45:14 +0000
committer: Chromium commit bot <commit-bot@chromium.org> 2018-06-22 12:45:14 +0000
commit: a327030ca50bd9e06d4bab3ffc5610bf348be0ee (patch)
tree: fe392e1b235dd2e218f121325fc6bbfc18bdbe68
parent: c3cc2ab66d3d8f52dea8083abb6775115e17af7d (diff)
download: pdfium-a327030ca50bd9e06d4bab3ffc5610bf348be0ee.tar.xz
4 files changed, 121 insertions, 15 deletions
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp
index b163cafbbb..393a6f6785 100644
--- a/core/fpdfapi/parser/cpdf_document.cpp
+++ b/core/fpdfapi/parser/cpdf_document.cpp
@@ -219,17 +219,6 @@ void CPDF_Document::LoadDocInternal() {
   m_pRootDict = pRootObj->GetDict();
   if (!m_pRootDict)
     return;
-
-  LoadDocumentInfo();
-}
-
-void CPDF_Document::LoadDocumentInfo() {
-  if (!m_pParser)
-    return;
-
-  CPDF_Object* pInfoObj = GetOrParseIndirectObject(m_pParser->GetInfoObjNum());
-  if (pInfoObj)
-    m_pInfoDict = pInfoObj->GetDict();
 }
 
 void CPDF_Document::LoadDoc() {
@@ -601,6 +590,18 @@ bool CPDF_Document::InsertNewPage(int iPage, CPDF_Dictionary* pPageDict) {
   return true;
 }
 
+CPDF_Dictionary* CPDF_Document::GetInfo() {
+  if (m_pInfoDict)
+    return m_pInfoDict.Get();
+
+  if (!m_pParser || !m_pParser->GetInfoObjNum())
+    return nullptr;
+
+  CPDF_Reference ref(this, m_pParser->GetInfoObjNum());
+  m_pInfoDict = ToDictionary(ref.GetDirect());
+  return m_pInfoDict.Get();
+}
+
 void CPDF_Document::DeletePage(int iPage) {
   CPDF_Dictionary* pPages = GetPagesDict();
   if (!pPages)
diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h
index d196438e80..94d20504c5 100644
--- a/core/fpdfapi/parser/cpdf_document.h
+++ b/core/fpdfapi/parser/cpdf_document.h
@@ -63,8 +63,7 @@ class CPDF_Document : public CPDF_IndirectObjectHolder {
   CPDF_Parser* GetParser() const { return m_pParser.get(); }
   const CPDF_Dictionary* GetRoot() const { return m_pRootDict; }
   CPDF_Dictionary* GetRoot() { return m_pRootDict; }
-  const CPDF_Dictionary* GetInfo() const { return m_pInfoDict.Get(); }
-  CPDF_Dictionary* GetInfo() { return m_pInfoDict.Get(); }
+  CPDF_Dictionary* GetInfo();
 
   void DeletePage(int iPage);
   int GetPageCount() const;
@@ -101,7 +100,6 @@ class CPDF_Document : public CPDF_IndirectObjectHolder {
   void LoadDoc();
   void LoadLinearizedDoc(const CPDF_LinearizedHeader* pLinearizationParams);
   void LoadPages();
-  void LoadDocumentInfo();
 
   void CreateNewDoc();
   CPDF_Dictionary* CreateNewPage(int iPage);
diff --git a/fpdfsdk/fpdf_dataavail_embeddertest.cpp b/fpdfsdk/fpdf_dataavail_embeddertest.cpp
index 2084153852..ba7726613e 100644
--- a/fpdfsdk/fpdf_dataavail_embeddertest.cpp
+++ b/fpdfsdk/fpdf_dataavail_embeddertest.cpp
@@ -9,6 +9,8 @@
 #include <utility>
 #include <vector>
 
+#include "core/fxcrt/bytestring.h"
+#include "core/fxcrt/widestring.h"
 #include "public/fpdfview.h"
 #include "testing/embedder_test.h"
 #include "testing/gtest/include/gtest/gtest.h"
@@ -87,6 +89,9 @@ class TestAsyncLoader : public FX_DOWNLOADHINTS, FX_FILEAVAIL {
     ClearRequestedSegments();
   }
 
+  char* file_contents() { return file_contents_.get(); }
+  size_t file_length() const { return file_length_; }
+
  private:
   void SetDataAvailable(size_t start, size_t size) {
     available_ranges_.Union(RangeSet::Range(start, start + size));
@@ -271,3 +276,105 @@ TEST_F(FPDFDataAvailEmbeddertest, LoadSecondPageIfLinearizedWithHints) {
   EXPECT_TRUE(page);
   FPDF_ClosePage(page);
 }
+
+TEST_F(FPDFDataAvailEmbeddertest, LoadInfoAfterReceivingWholeDocument) {
+  TestAsyncLoader loader("linearized.pdf");
+  loader.set_is_new_data_available(false);
+  avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
+  while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
+    loader.FlushRequestedData();
+  }
+
+  document_ = FPDFAvail_GetDocument(avail_, nullptr);
+  ASSERT_TRUE(document_);
+
+  // The "info" dictionary should still be unavailable.
+  EXPECT_FALSE(FPDF_GetMetaText(document_, "CreationDate", nullptr, 0));
+
+  // Simulate receiving whole file.
+  loader.set_is_new_data_available(true);
+  // Load second page, to parse additional crossref sections.
+  EXPECT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints()));
+
+  EXPECT_TRUE(FPDF_GetMetaText(document_, "CreationDate", nullptr, 0));
+}
+
+TEST_F(FPDFDataAvailEmbeddertest, LoadInfoAfterReceivingFirstPage) {
+  TestAsyncLoader loader("linearized.pdf");
+  // Map "Info" to an object within the first section without breaking
+  // linearization.
+  ByteString data(loader.file_contents(), loader.file_length());
+  Optional<size_t> index = data.Find("/Info 27 0 R");
+  ASSERT_TRUE(index);
+  memcpy(loader.file_contents() + *index, "/Info 29 0 R", 12);
+
+  loader.set_is_new_data_available(false);
+  avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
+  while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
+    loader.FlushRequestedData();
+  }
+
+  document_ = FPDFAvail_GetDocument(avail_, nullptr);
+  ASSERT_TRUE(document_);
+
+  // The "Info" dictionary should be available for the linearized document, if
+  // it is located in the first page section.
+  // Info was remapped to a dictionary with Type "Catalog"
+  unsigned short buffer[100] = {0};
+  EXPECT_TRUE(FPDF_GetMetaText(document_, "Type", buffer, sizeof(buffer)));
+  constexpr wchar_t kExpectedValue[] = L"Catalog";
+  EXPECT_EQ(WideString(kExpectedValue),
+            WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedValue)));
+}
+
+TEST_F(FPDFDataAvailEmbeddertest, TryLoadInvalidInfo) {
+  TestAsyncLoader loader("linearized.pdf");
+  // Map "Info" to an invalid object without breaking linearization.
+  ByteString data(loader.file_contents(), loader.file_length());
+  Optional<size_t> index = data.Find("/Info 27 0 R");
+  ASSERT_TRUE(index);
+  memcpy(loader.file_contents() + *index, "/Info 99 0 R", 12);
+
+  loader.set_is_new_data_available(false);
+  avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
+  while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
+    loader.FlushRequestedData();
+  }
+
+  document_ = FPDFAvail_GetDocument(avail_, nullptr);
+  ASSERT_TRUE(document_);
+
+  // Set all data available.
+  loader.set_is_new_data_available(true);
+  // Check second page, to load additional crossrefs.
+  ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 0, loader.hints()));
+
+  // Test that api is robust enough to handle the bad case.
+  EXPECT_FALSE(FPDF_GetMetaText(document_, "Type", nullptr, 0));
+}
+
+TEST_F(FPDFDataAvailEmbeddertest, TryLoadNonExistsInfo) {
+  TestAsyncLoader loader("linearized.pdf");
+  // Break the "Info" parameter without breaking linearization.
+  ByteString data(loader.file_contents(), loader.file_length());
+  Optional<size_t> index = data.Find("/Info 27 0 R");
+  ASSERT_TRUE(index);
+  memcpy(loader.file_contents() + *index, "/I_fo 27 0 R", 12);
+
+  loader.set_is_new_data_available(false);
+  avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
+  while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
+    loader.FlushRequestedData();
+  }
+
+  document_ = FPDFAvail_GetDocument(avail_, nullptr);
+  ASSERT_TRUE(document_);
+
+  // Set all data available.
+  loader.set_is_new_data_available(true);
+  // Check second page, to load additional crossrefs.
+  ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 0, loader.hints()));
+
+  // Test that api is robust enough to handle the bad case.
+  EXPECT_FALSE(FPDF_GetMetaText(document_, "Type", nullptr, 0));
+}
diff --git a/fpdfsdk/fpdf_doc.cpp b/fpdfsdk/fpdf_doc.cpp
index 465b11302b..7a2bbc4c2e 100644
--- a/fpdfsdk/fpdf_doc.cpp
+++ b/fpdfsdk/fpdf_doc.cpp
@@ -400,7 +400,7 @@ FPDF_EXPORT unsigned long FPDF_CALLCONV FPDF_GetMetaText(FPDF_DOCUMENT document,
   CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
   if (!pDoc)
     return 0;
-  pDoc->LoadDocumentInfo();
+
   const CPDF_Dictionary* pInfo = pDoc->GetInfo();
   if (!pInfo)
     return 0;
author	Artem Strygin <art-snake@yandex-team.ru>	2018-06-22 12:45:14 +0000
committer	Chromium commit bot <commit-bot@chromium.org>	2018-06-22 12:45:14 +0000
commit	a327030ca50bd9e06d4bab3ffc5610bf348be0ee (patch)
tree	fe392e1b235dd2e218f121325fc6bbfc18bdbe68
parent	c3cc2ab66d3d8f52dea8083abb6775115e17af7d (diff)
download	pdfium-a327030ca50bd9e06d4bab3ffc5610bf348be0ee.tar.xz