From 461b1d93f717e248ceb3c1e1bbb8285ba3258f8c Mon Sep 17 00:00:00 2001 From: art-snake Date: Mon, 31 Oct 2016 12:25:30 -0700 Subject: Fix loading page using hint tables. When linearized document have hint table, The FPDFAvail_IsPageAvail return true, but FPDF_LoadPage return nullptr, for non first pages. This happens, bacause document not use hint tables, to load page. To fix this, I force save the page's ObjNum in document. This is restoring of original fix: https://codereview.chromium.org/2437773003/ Review-Url: https://codereview.chromium.org/2444903002 --- core/fpdfapi/parser/cpdf_data_avail.cpp | 52 ++++---- core/fpdfapi/parser/cpdf_document_unittest.cpp | 44 ++++++- fpdfsdk/fpdf_dataavail_embeddertest.cpp | 166 +++++++++++++++++++++++++ 3 files changed, 231 insertions(+), 31 deletions(-) diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp index c6a434be5d..318f2cf54d 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.cpp +++ b/core/fpdfapi/parser/cpdf_data_avail.cpp @@ -1626,7 +1626,7 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail( if (nResult != DataAvailable) return nResult; m_pagesLoadState.insert(dwPage); - return DataAvailable; + return GetPage(dwPage) ? DataAvailable : DataError; } if (m_bMainXRefLoadedOK) { @@ -1751,31 +1751,33 @@ int CPDF_DataAvail::GetPageCount() const { CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) { if (!m_pDocument || index < 0 || index >= GetPageCount()) return nullptr; + CPDF_Dictionary* page = m_pDocument->GetPage(index); + if (page) + return page; + if (!m_pLinearized || !m_pHintTables) + return nullptr; - if (m_pLinearized) { - CPDF_Dictionary* pDict = m_pLinearized->GetDict(); - CPDF_Object* pObj = pDict ? pDict->GetDirectObjectFor("P") : nullptr; - - int pageNum = pObj ? pObj->GetInteger() : 0; - if (m_pHintTables && index != pageNum) { - FX_FILESIZE szPageStartPos = 0; - FX_FILESIZE szPageLength = 0; - uint32_t dwObjNum = 0; - bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos, - &szPageLength, &dwObjNum); - if (!bPagePosGot) - return nullptr; - - m_syntaxParser.InitParser(m_pFileRead, (uint32_t)szPageStartPos); - CPDF_Object* pPageDict = ParseIndirectObjectAt(0, dwObjNum, m_pDocument); - if (!pPageDict) - return nullptr; - - if (!m_pDocument->ReplaceIndirectObjectIfHigherGeneration(dwObjNum, - pPageDict)) { - return nullptr; - } - return pPageDict->GetDict(); + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + CPDF_Object* pObj = pDict ? pDict->GetDirectObjectFor("P") : nullptr; + int firstPageNum = pObj ? pObj->GetInteger() : 0; + if (index == firstPageNum) + return nullptr; + FX_FILESIZE szPageStartPos = 0; + FX_FILESIZE szPageLength = 0; + uint32_t dwObjNum = 0; + const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos, + &szPageLength, &dwObjNum); + if (!bPagePosGot || !dwObjNum) + return nullptr; + // We should say to the document, which object is the page. + m_pDocument->SetPageObjNum(index, dwObjNum); + // Page object already can be parsed in document. + CPDF_Object* pPageDict = m_pDocument->GetIndirectObject(dwObjNum); + if (!pPageDict) { + m_syntaxParser.InitParser(m_pFileRead, (uint32_t)szPageStartPos); + pPageDict = ParseIndirectObjectAt(0, dwObjNum, m_pDocument); + if (pPageDict) { + m_pDocument->ReplaceIndirectObjectIfHigherGeneration(dwObjNum, pPageDict); } } return m_pDocument->GetPage(index); diff --git a/core/fpdfapi/parser/cpdf_document_unittest.cpp b/core/fpdfapi/parser/cpdf_document_unittest.cpp index 799ecc694e..9336626f45 100644 --- a/core/fpdfapi/parser/cpdf_document_unittest.cpp +++ b/core/fpdfapi/parser/cpdf_document_unittest.cpp @@ -15,6 +15,9 @@ namespace { +using ScopedDictionary = + std::unique_ptr>; + CPDF_Dictionary* CreatePageTreeNode(CPDF_Array* kids, CPDF_Document* pDoc, int count) { @@ -35,13 +38,9 @@ CPDF_Dictionary* CreateNumberedPage(size_t number) { return page; } -} // namespace - class CPDF_TestDocumentForPages : public CPDF_Document { public: CPDF_TestDocumentForPages() : CPDF_Document(nullptr) { - CPDF_ModuleMgr* module_mgr = CPDF_ModuleMgr::Get(); - module_mgr->InitPageModule(); // Set up test CPDF_Array* zeroToTwo = new CPDF_Array(); zeroToTwo->AddReference(this, AddIndirectObject(CreateNumberedPage(0))); @@ -80,8 +79,18 @@ class CPDF_TestDocumentForPages : public CPDF_Document { std::unique_ptr> m_pOwnedRootDict; }; +} // namespace + +class cpdf_document_test : public testing::Test { + public: + void SetUp() override { + CPDF_ModuleMgr* module_mgr = CPDF_ModuleMgr::Get(); + module_mgr->InitPageModule(); + } + void TearDown() override {} +}; -TEST(cpdf_document, GetPages) { +TEST_F(cpdf_document_test, GetPages) { std::unique_ptr document = pdfium::MakeUnique(); for (int i = 0; i < 7; i++) { @@ -94,7 +103,7 @@ TEST(cpdf_document, GetPages) { EXPECT_FALSE(page); } -TEST(cpdf_document, GetPagesReverseOrder) { +TEST_F(cpdf_document_test, GetPagesReverseOrder) { std::unique_ptr document = pdfium::MakeUnique(); for (int i = 6; i >= 0; i--) { @@ -106,3 +115,26 @@ TEST(cpdf_document, GetPagesReverseOrder) { CPDF_Dictionary* page = document->GetPage(7); EXPECT_FALSE(page); } + +TEST_F(cpdf_document_test, UseCachedPageObjNumIfHaveNotPagesDict) { + // ObjNum can be added in CPDF_DataAvail::IsPageAvail, and PagesDict + // can be not exists in this case. + // (case, when hint table is used to page check in CPDF_DataAvail). + CPDF_Document document(pdfium::MakeUnique()); + ScopedDictionary dict(new CPDF_Dictionary()); + const int page_count = 100; + dict->SetIntegerFor("N", page_count); + document.LoadLinearizedDoc(dict.get()); + ASSERT_EQ(page_count, document.GetPageCount()); + CPDF_Object* page_stub = new CPDF_Dictionary(); + const uint32_t obj_num = document.AddIndirectObject(page_stub); + const int test_page_num = 33; + + EXPECT_FALSE(document.IsPageLoaded(test_page_num)); + EXPECT_EQ(nullptr, document.GetPage(test_page_num)); + + document.SetPageObjNum(test_page_num, obj_num); + + EXPECT_TRUE(document.IsPageLoaded(test_page_num)); + EXPECT_EQ(page_stub, document.GetPage(test_page_num)); +} diff --git a/fpdfsdk/fpdf_dataavail_embeddertest.cpp b/fpdfsdk/fpdf_dataavail_embeddertest.cpp index 70537779fe..47ba54bcde 100644 --- a/fpdfsdk/fpdf_dataavail_embeddertest.cpp +++ b/fpdfsdk/fpdf_dataavail_embeddertest.cpp @@ -2,9 +2,160 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#include +#include +#include +#include + #include "public/fpdfview.h" #include "testing/embedder_test.h" #include "testing/gtest/include/gtest/gtest.h" +#include "testing/test_support.h" +#include "testing/utils/path_service.h" + +namespace { +class TestAsyncLoader : public FX_DOWNLOADHINTS, FX_FILEAVAIL { + public: + TestAsyncLoader(const std::string& file_name) { + std::string file_path; + if (!PathService::GetTestFilePath(file_name, &file_path)) + return; + file_contents_ = GetFileContents(file_path.c_str(), &file_length_); + if (!file_contents_) + return; + + file_access_.m_FileLen = static_cast(file_length_); + file_access_.m_GetBlock = SGetBlock; + file_access_.m_Param = this; + + FX_DOWNLOADHINTS::version = 1; + FX_DOWNLOADHINTS::AddSegment = SAddSegment; + + FX_FILEAVAIL::version = 1; + FX_FILEAVAIL::IsDataAvail = SIsDataAvail; + } + + bool IsOpened() const { return !!file_contents_; } + + FPDF_FILEACCESS* file_access() { return &file_access_; } + FX_DOWNLOADHINTS* hints() { return this; } + FX_FILEAVAIL* file_avail() { return this; } + + const std::vector>& requested_segments() const { + return requested_segments_; + } + + void ClearRequestedSegments() { requested_segments_.clear(); } + + bool is_new_data_available() const { return is_new_data_available_; } + void set_is_new_data_available(bool is_new_data_available) { + is_new_data_available_ = is_new_data_available; + } + + private: + void SetDataAvailable(size_t start, size_t size) { + if (size == 0) + return; + const auto range = std::make_pair(start, start + size); + if (available_ranges_.empty()) { + available_ranges_.insert(range); + return; + } + auto start_it = available_ranges_.upper_bound(range); + if (start_it != available_ranges_.begin()) + --start_it; // start now points to the key equal or lower than offset. + if (start_it->second < range.first) + ++start_it; // start element is entirely before current range, skip it. + + auto end_it = available_ranges_.upper_bound( + std::make_pair(range.second, range.second)); + if (start_it == end_it) { // No ranges to merge. + available_ranges_.insert(range); + return; + } + + --end_it; + + size_t new_start = std::min(start_it->first, range.first); + size_t new_end = std::max(end_it->second, range.second); + + available_ranges_.erase(start_it, ++end_it); + available_ranges_.insert(std::make_pair(new_start, new_end)); + } + + bool CheckDataAlreadyAvailable(size_t start, size_t size) const { + if (size == 0) + return false; + const auto range = std::make_pair(start, start + size); + auto it = available_ranges_.upper_bound(range); + if (it == available_ranges_.begin()) + return false; // No ranges includes range.start(). + + --it; // Now it starts equal or before range.start(). + return it->second >= range.second; + } + + int GetBlockImpl(unsigned long pos, unsigned char* pBuf, unsigned long size) { + if (!IsDataAvailImpl(pos, size)) + return 0; + const unsigned long end = + std::min(static_cast(file_length_), pos + size); + if (end <= pos) + return 0; + memcpy(pBuf, file_contents_.get() + pos, end - pos); + SetDataAvailable(pos, end - pos); + return static_cast(end - pos); + } + + void AddSegmentImpl(size_t offset, size_t size) { + requested_segments_.push_back(std::make_pair(offset, size)); + } + + bool IsDataAvailImpl(size_t offset, size_t size) { + if (offset + size > file_length_) + return false; + if (is_new_data_available_) { + SetDataAvailable(offset, size); + return true; + } + return CheckDataAlreadyAvailable(offset, size); + } + + static int SGetBlock(void* param, + unsigned long pos, + unsigned char* pBuf, + unsigned long size) { + return static_cast(param)->GetBlockImpl(pos, pBuf, size); + } + + static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) { + return static_cast(pThis)->AddSegmentImpl(offset, size); + } + + static FPDF_BOOL SIsDataAvail(FX_FILEAVAIL* pThis, + size_t offset, + size_t size) { + return static_cast(pThis)->IsDataAvailImpl(offset, size); + } + + FPDF_FILEACCESS file_access_; + + std::unique_ptr file_contents_; + size_t file_length_; + std::vector> requested_segments_; + bool is_new_data_available_ = true; + + using Range = std::pair; + struct range_compare { + bool operator()(const Range& lval, const Range& rval) const { + return lval.first < rval.first; + } + }; + using RangesContainer = std::set; + RangesContainer available_ranges_; +}; + +} // namespace class FPDFDataAvailEmbeddertest : public EmbedderTest {}; @@ -19,3 +170,18 @@ TEST_F(FPDFDataAvailEmbeddertest, TrailerAsHexstring) { EXPECT_FALSE(OpenDocument("trailer_as_hexstring.pdf")); EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints_)); } + +TEST_F(FPDFDataAvailEmbeddertest, LoadUsingHintTables) { + TestAsyncLoader loader("feature_linearized_loading.pdf"); + avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access()); + ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints())); + document_ = FPDFAvail_GetDocument(avail_, nullptr); + ASSERT_TRUE(document_); + ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints())); + + // No new data available, to prevent load "Pages" node. + loader.set_is_new_data_available(false); + FPDF_PAGE page = LoadPage(1); + EXPECT_TRUE(page); + UnloadPage(page); +} -- cgit v1.2.3