From a327030ca50bd9e06d4bab3ffc5610bf348be0ee Mon Sep 17 00:00:00 2001 From: Artem Strygin Date: Fri, 22 Jun 2018 12:45:14 +0000 Subject: Rework of Fixing metadata not read from linearized file. Move receiving "Info" dictionary form API implementation to CPDF_Document. Also added test. Bug: pdfium:664 Change-Id: I273980750fbdd4d20711f651245780fc9ba02789 Reviewed-on: https://pdfium-review.googlesource.com/35490 Commit-Queue: Art Snake Reviewed-by: Henrique Nakashima --- core/fpdfapi/parser/cpdf_document.cpp | 23 +++---- core/fpdfapi/parser/cpdf_document.h | 4 +- fpdfsdk/fpdf_dataavail_embeddertest.cpp | 107 ++++++++++++++++++++++++++++++++ fpdfsdk/fpdf_doc.cpp | 2 +- 4 files changed, 121 insertions(+), 15 deletions(-) diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp index b163cafbbb..393a6f6785 100644 --- a/core/fpdfapi/parser/cpdf_document.cpp +++ b/core/fpdfapi/parser/cpdf_document.cpp @@ -219,17 +219,6 @@ void CPDF_Document::LoadDocInternal() { m_pRootDict = pRootObj->GetDict(); if (!m_pRootDict) return; - - LoadDocumentInfo(); -} - -void CPDF_Document::LoadDocumentInfo() { - if (!m_pParser) - return; - - CPDF_Object* pInfoObj = GetOrParseIndirectObject(m_pParser->GetInfoObjNum()); - if (pInfoObj) - m_pInfoDict = pInfoObj->GetDict(); } void CPDF_Document::LoadDoc() { @@ -601,6 +590,18 @@ bool CPDF_Document::InsertNewPage(int iPage, CPDF_Dictionary* pPageDict) { return true; } +CPDF_Dictionary* CPDF_Document::GetInfo() { + if (m_pInfoDict) + return m_pInfoDict.Get(); + + if (!m_pParser || !m_pParser->GetInfoObjNum()) + return nullptr; + + CPDF_Reference ref(this, m_pParser->GetInfoObjNum()); + m_pInfoDict = ToDictionary(ref.GetDirect()); + return m_pInfoDict.Get(); +} + void CPDF_Document::DeletePage(int iPage) { CPDF_Dictionary* pPages = GetPagesDict(); if (!pPages) diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h index d196438e80..94d20504c5 100644 --- a/core/fpdfapi/parser/cpdf_document.h +++ b/core/fpdfapi/parser/cpdf_document.h @@ -63,8 +63,7 @@ class CPDF_Document : public CPDF_IndirectObjectHolder { CPDF_Parser* GetParser() const { return m_pParser.get(); } const CPDF_Dictionary* GetRoot() const { return m_pRootDict; } CPDF_Dictionary* GetRoot() { return m_pRootDict; } - const CPDF_Dictionary* GetInfo() const { return m_pInfoDict.Get(); } - CPDF_Dictionary* GetInfo() { return m_pInfoDict.Get(); } + CPDF_Dictionary* GetInfo(); void DeletePage(int iPage); int GetPageCount() const; @@ -101,7 +100,6 @@ class CPDF_Document : public CPDF_IndirectObjectHolder { void LoadDoc(); void LoadLinearizedDoc(const CPDF_LinearizedHeader* pLinearizationParams); void LoadPages(); - void LoadDocumentInfo(); void CreateNewDoc(); CPDF_Dictionary* CreateNewPage(int iPage); diff --git a/fpdfsdk/fpdf_dataavail_embeddertest.cpp b/fpdfsdk/fpdf_dataavail_embeddertest.cpp index 2084153852..ba7726613e 100644 --- a/fpdfsdk/fpdf_dataavail_embeddertest.cpp +++ b/fpdfsdk/fpdf_dataavail_embeddertest.cpp @@ -9,6 +9,8 @@ #include #include +#include "core/fxcrt/bytestring.h" +#include "core/fxcrt/widestring.h" #include "public/fpdfview.h" #include "testing/embedder_test.h" #include "testing/gtest/include/gtest/gtest.h" @@ -87,6 +89,9 @@ class TestAsyncLoader : public FX_DOWNLOADHINTS, FX_FILEAVAIL { ClearRequestedSegments(); } + char* file_contents() { return file_contents_.get(); } + size_t file_length() const { return file_length_; } + private: void SetDataAvailable(size_t start, size_t size) { available_ranges_.Union(RangeSet::Range(start, start + size)); @@ -271,3 +276,105 @@ TEST_F(FPDFDataAvailEmbeddertest, LoadSecondPageIfLinearizedWithHints) { EXPECT_TRUE(page); FPDF_ClosePage(page); } + +TEST_F(FPDFDataAvailEmbeddertest, LoadInfoAfterReceivingWholeDocument) { + TestAsyncLoader loader("linearized.pdf"); + loader.set_is_new_data_available(false); + avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access()); + while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) { + loader.FlushRequestedData(); + } + + document_ = FPDFAvail_GetDocument(avail_, nullptr); + ASSERT_TRUE(document_); + + // The "info" dictionary should still be unavailable. + EXPECT_FALSE(FPDF_GetMetaText(document_, "CreationDate", nullptr, 0)); + + // Simulate receiving whole file. + loader.set_is_new_data_available(true); + // Load second page, to parse additional crossref sections. + EXPECT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints())); + + EXPECT_TRUE(FPDF_GetMetaText(document_, "CreationDate", nullptr, 0)); +} + +TEST_F(FPDFDataAvailEmbeddertest, LoadInfoAfterReceivingFirstPage) { + TestAsyncLoader loader("linearized.pdf"); + // Map "Info" to an object within the first section without breaking + // linearization. + ByteString data(loader.file_contents(), loader.file_length()); + Optional index = data.Find("/Info 27 0 R"); + ASSERT_TRUE(index); + memcpy(loader.file_contents() + *index, "/Info 29 0 R", 12); + + loader.set_is_new_data_available(false); + avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access()); + while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) { + loader.FlushRequestedData(); + } + + document_ = FPDFAvail_GetDocument(avail_, nullptr); + ASSERT_TRUE(document_); + + // The "Info" dictionary should be available for the linearized document, if + // it is located in the first page section. + // Info was remapped to a dictionary with Type "Catalog" + unsigned short buffer[100] = {0}; + EXPECT_TRUE(FPDF_GetMetaText(document_, "Type", buffer, sizeof(buffer))); + constexpr wchar_t kExpectedValue[] = L"Catalog"; + EXPECT_EQ(WideString(kExpectedValue), + WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedValue))); +} + +TEST_F(FPDFDataAvailEmbeddertest, TryLoadInvalidInfo) { + TestAsyncLoader loader("linearized.pdf"); + // Map "Info" to an invalid object without breaking linearization. + ByteString data(loader.file_contents(), loader.file_length()); + Optional index = data.Find("/Info 27 0 R"); + ASSERT_TRUE(index); + memcpy(loader.file_contents() + *index, "/Info 99 0 R", 12); + + loader.set_is_new_data_available(false); + avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access()); + while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) { + loader.FlushRequestedData(); + } + + document_ = FPDFAvail_GetDocument(avail_, nullptr); + ASSERT_TRUE(document_); + + // Set all data available. + loader.set_is_new_data_available(true); + // Check second page, to load additional crossrefs. + ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 0, loader.hints())); + + // Test that api is robust enough to handle the bad case. + EXPECT_FALSE(FPDF_GetMetaText(document_, "Type", nullptr, 0)); +} + +TEST_F(FPDFDataAvailEmbeddertest, TryLoadNonExistsInfo) { + TestAsyncLoader loader("linearized.pdf"); + // Break the "Info" parameter without breaking linearization. + ByteString data(loader.file_contents(), loader.file_length()); + Optional index = data.Find("/Info 27 0 R"); + ASSERT_TRUE(index); + memcpy(loader.file_contents() + *index, "/I_fo 27 0 R", 12); + + loader.set_is_new_data_available(false); + avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access()); + while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) { + loader.FlushRequestedData(); + } + + document_ = FPDFAvail_GetDocument(avail_, nullptr); + ASSERT_TRUE(document_); + + // Set all data available. + loader.set_is_new_data_available(true); + // Check second page, to load additional crossrefs. + ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 0, loader.hints())); + + // Test that api is robust enough to handle the bad case. + EXPECT_FALSE(FPDF_GetMetaText(document_, "Type", nullptr, 0)); +} diff --git a/fpdfsdk/fpdf_doc.cpp b/fpdfsdk/fpdf_doc.cpp index 465b11302b..7a2bbc4c2e 100644 --- a/fpdfsdk/fpdf_doc.cpp +++ b/fpdfsdk/fpdf_doc.cpp @@ -400,7 +400,7 @@ FPDF_EXPORT unsigned long FPDF_CALLCONV FPDF_GetMetaText(FPDF_DOCUMENT document, CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); if (!pDoc) return 0; - pDoc->LoadDocumentInfo(); + const CPDF_Dictionary* pInfo = pDoc->GetInfo(); if (!pInfo) return 0; -- cgit v1.2.3