summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/fpdfapi/parser/cpdf_data_avail.cpp52
-rw-r--r--core/fpdfapi/parser/cpdf_document_unittest.cpp44
-rw-r--r--fpdfsdk/fpdf_dataavail_embeddertest.cpp166
3 files changed, 231 insertions, 31 deletions
diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp
index c6a434be5d..318f2cf54d 100644
--- a/core/fpdfapi/parser/cpdf_data_avail.cpp
+++ b/core/fpdfapi/parser/cpdf_data_avail.cpp
@@ -1626,7 +1626,7 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
if (nResult != DataAvailable)
return nResult;
m_pagesLoadState.insert(dwPage);
- return DataAvailable;
+ return GetPage(dwPage) ? DataAvailable : DataError;
}
if (m_bMainXRefLoadedOK) {
@@ -1751,31 +1751,33 @@ int CPDF_DataAvail::GetPageCount() const {
CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) {
if (!m_pDocument || index < 0 || index >= GetPageCount())
return nullptr;
+ CPDF_Dictionary* page = m_pDocument->GetPage(index);
+ if (page)
+ return page;
+ if (!m_pLinearized || !m_pHintTables)
+ return nullptr;
- if (m_pLinearized) {
- CPDF_Dictionary* pDict = m_pLinearized->GetDict();
- CPDF_Object* pObj = pDict ? pDict->GetDirectObjectFor("P") : nullptr;
-
- int pageNum = pObj ? pObj->GetInteger() : 0;
- if (m_pHintTables && index != pageNum) {
- FX_FILESIZE szPageStartPos = 0;
- FX_FILESIZE szPageLength = 0;
- uint32_t dwObjNum = 0;
- bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
- &szPageLength, &dwObjNum);
- if (!bPagePosGot)
- return nullptr;
-
- m_syntaxParser.InitParser(m_pFileRead, (uint32_t)szPageStartPos);
- CPDF_Object* pPageDict = ParseIndirectObjectAt(0, dwObjNum, m_pDocument);
- if (!pPageDict)
- return nullptr;
-
- if (!m_pDocument->ReplaceIndirectObjectIfHigherGeneration(dwObjNum,
- pPageDict)) {
- return nullptr;
- }
- return pPageDict->GetDict();
+ CPDF_Dictionary* pDict = m_pLinearized->GetDict();
+ CPDF_Object* pObj = pDict ? pDict->GetDirectObjectFor("P") : nullptr;
+ int firstPageNum = pObj ? pObj->GetInteger() : 0;
+ if (index == firstPageNum)
+ return nullptr;
+ FX_FILESIZE szPageStartPos = 0;
+ FX_FILESIZE szPageLength = 0;
+ uint32_t dwObjNum = 0;
+ const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
+ &szPageLength, &dwObjNum);
+ if (!bPagePosGot || !dwObjNum)
+ return nullptr;
+ // We should say to the document, which object is the page.
+ m_pDocument->SetPageObjNum(index, dwObjNum);
+ // Page object already can be parsed in document.
+ CPDF_Object* pPageDict = m_pDocument->GetIndirectObject(dwObjNum);
+ if (!pPageDict) {
+ m_syntaxParser.InitParser(m_pFileRead, (uint32_t)szPageStartPos);
+ pPageDict = ParseIndirectObjectAt(0, dwObjNum, m_pDocument);
+ if (pPageDict) {
+ m_pDocument->ReplaceIndirectObjectIfHigherGeneration(dwObjNum, pPageDict);
}
}
return m_pDocument->GetPage(index);
diff --git a/core/fpdfapi/parser/cpdf_document_unittest.cpp b/core/fpdfapi/parser/cpdf_document_unittest.cpp
index 799ecc694e..9336626f45 100644
--- a/core/fpdfapi/parser/cpdf_document_unittest.cpp
+++ b/core/fpdfapi/parser/cpdf_document_unittest.cpp
@@ -15,6 +15,9 @@
namespace {
+using ScopedDictionary =
+ std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>>;
+
CPDF_Dictionary* CreatePageTreeNode(CPDF_Array* kids,
CPDF_Document* pDoc,
int count) {
@@ -35,13 +38,9 @@ CPDF_Dictionary* CreateNumberedPage(size_t number) {
return page;
}
-} // namespace
-
class CPDF_TestDocumentForPages : public CPDF_Document {
public:
CPDF_TestDocumentForPages() : CPDF_Document(nullptr) {
- CPDF_ModuleMgr* module_mgr = CPDF_ModuleMgr::Get();
- module_mgr->InitPageModule();
// Set up test
CPDF_Array* zeroToTwo = new CPDF_Array();
zeroToTwo->AddReference(this, AddIndirectObject(CreateNumberedPage(0)));
@@ -80,8 +79,18 @@ class CPDF_TestDocumentForPages : public CPDF_Document {
std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>>
m_pOwnedRootDict;
};
+} // namespace
+
+class cpdf_document_test : public testing::Test {
+ public:
+ void SetUp() override {
+ CPDF_ModuleMgr* module_mgr = CPDF_ModuleMgr::Get();
+ module_mgr->InitPageModule();
+ }
+ void TearDown() override {}
+};
-TEST(cpdf_document, GetPages) {
+TEST_F(cpdf_document_test, GetPages) {
std::unique_ptr<CPDF_TestDocumentForPages> document =
pdfium::MakeUnique<CPDF_TestDocumentForPages>();
for (int i = 0; i < 7; i++) {
@@ -94,7 +103,7 @@ TEST(cpdf_document, GetPages) {
EXPECT_FALSE(page);
}
-TEST(cpdf_document, GetPagesReverseOrder) {
+TEST_F(cpdf_document_test, GetPagesReverseOrder) {
std::unique_ptr<CPDF_TestDocumentForPages> document =
pdfium::MakeUnique<CPDF_TestDocumentForPages>();
for (int i = 6; i >= 0; i--) {
@@ -106,3 +115,26 @@ TEST(cpdf_document, GetPagesReverseOrder) {
CPDF_Dictionary* page = document->GetPage(7);
EXPECT_FALSE(page);
}
+
+TEST_F(cpdf_document_test, UseCachedPageObjNumIfHaveNotPagesDict) {
+ // ObjNum can be added in CPDF_DataAvail::IsPageAvail, and PagesDict
+ // can be not exists in this case.
+ // (case, when hint table is used to page check in CPDF_DataAvail).
+ CPDF_Document document(pdfium::MakeUnique<CPDF_Parser>());
+ ScopedDictionary dict(new CPDF_Dictionary());
+ const int page_count = 100;
+ dict->SetIntegerFor("N", page_count);
+ document.LoadLinearizedDoc(dict.get());
+ ASSERT_EQ(page_count, document.GetPageCount());
+ CPDF_Object* page_stub = new CPDF_Dictionary();
+ const uint32_t obj_num = document.AddIndirectObject(page_stub);
+ const int test_page_num = 33;
+
+ EXPECT_FALSE(document.IsPageLoaded(test_page_num));
+ EXPECT_EQ(nullptr, document.GetPage(test_page_num));
+
+ document.SetPageObjNum(test_page_num, obj_num);
+
+ EXPECT_TRUE(document.IsPageLoaded(test_page_num));
+ EXPECT_EQ(page_stub, document.GetPage(test_page_num));
+}
diff --git a/fpdfsdk/fpdf_dataavail_embeddertest.cpp b/fpdfsdk/fpdf_dataavail_embeddertest.cpp
index 70537779fe..47ba54bcde 100644
--- a/fpdfsdk/fpdf_dataavail_embeddertest.cpp
+++ b/fpdfsdk/fpdf_dataavail_embeddertest.cpp
@@ -2,9 +2,160 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <vector>
+
#include "public/fpdfview.h"
#include "testing/embedder_test.h"
#include "testing/gtest/include/gtest/gtest.h"
+#include "testing/test_support.h"
+#include "testing/utils/path_service.h"
+
+namespace {
+class TestAsyncLoader : public FX_DOWNLOADHINTS, FX_FILEAVAIL {
+ public:
+ TestAsyncLoader(const std::string& file_name) {
+ std::string file_path;
+ if (!PathService::GetTestFilePath(file_name, &file_path))
+ return;
+ file_contents_ = GetFileContents(file_path.c_str(), &file_length_);
+ if (!file_contents_)
+ return;
+
+ file_access_.m_FileLen = static_cast<unsigned long>(file_length_);
+ file_access_.m_GetBlock = SGetBlock;
+ file_access_.m_Param = this;
+
+ FX_DOWNLOADHINTS::version = 1;
+ FX_DOWNLOADHINTS::AddSegment = SAddSegment;
+
+ FX_FILEAVAIL::version = 1;
+ FX_FILEAVAIL::IsDataAvail = SIsDataAvail;
+ }
+
+ bool IsOpened() const { return !!file_contents_; }
+
+ FPDF_FILEACCESS* file_access() { return &file_access_; }
+ FX_DOWNLOADHINTS* hints() { return this; }
+ FX_FILEAVAIL* file_avail() { return this; }
+
+ const std::vector<std::pair<size_t, size_t>>& requested_segments() const {
+ return requested_segments_;
+ }
+
+ void ClearRequestedSegments() { requested_segments_.clear(); }
+
+ bool is_new_data_available() const { return is_new_data_available_; }
+ void set_is_new_data_available(bool is_new_data_available) {
+ is_new_data_available_ = is_new_data_available;
+ }
+
+ private:
+ void SetDataAvailable(size_t start, size_t size) {
+ if (size == 0)
+ return;
+ const auto range = std::make_pair(start, start + size);
+ if (available_ranges_.empty()) {
+ available_ranges_.insert(range);
+ return;
+ }
+ auto start_it = available_ranges_.upper_bound(range);
+ if (start_it != available_ranges_.begin())
+ --start_it; // start now points to the key equal or lower than offset.
+ if (start_it->second < range.first)
+ ++start_it; // start element is entirely before current range, skip it.
+
+ auto end_it = available_ranges_.upper_bound(
+ std::make_pair(range.second, range.second));
+ if (start_it == end_it) { // No ranges to merge.
+ available_ranges_.insert(range);
+ return;
+ }
+
+ --end_it;
+
+ size_t new_start = std::min<size_t>(start_it->first, range.first);
+ size_t new_end = std::max(end_it->second, range.second);
+
+ available_ranges_.erase(start_it, ++end_it);
+ available_ranges_.insert(std::make_pair(new_start, new_end));
+ }
+
+ bool CheckDataAlreadyAvailable(size_t start, size_t size) const {
+ if (size == 0)
+ return false;
+ const auto range = std::make_pair(start, start + size);
+ auto it = available_ranges_.upper_bound(range);
+ if (it == available_ranges_.begin())
+ return false; // No ranges includes range.start().
+
+ --it; // Now it starts equal or before range.start().
+ return it->second >= range.second;
+ }
+
+ int GetBlockImpl(unsigned long pos, unsigned char* pBuf, unsigned long size) {
+ if (!IsDataAvailImpl(pos, size))
+ return 0;
+ const unsigned long end =
+ std::min(static_cast<unsigned long>(file_length_), pos + size);
+ if (end <= pos)
+ return 0;
+ memcpy(pBuf, file_contents_.get() + pos, end - pos);
+ SetDataAvailable(pos, end - pos);
+ return static_cast<int>(end - pos);
+ }
+
+ void AddSegmentImpl(size_t offset, size_t size) {
+ requested_segments_.push_back(std::make_pair(offset, size));
+ }
+
+ bool IsDataAvailImpl(size_t offset, size_t size) {
+ if (offset + size > file_length_)
+ return false;
+ if (is_new_data_available_) {
+ SetDataAvailable(offset, size);
+ return true;
+ }
+ return CheckDataAlreadyAvailable(offset, size);
+ }
+
+ static int SGetBlock(void* param,
+ unsigned long pos,
+ unsigned char* pBuf,
+ unsigned long size) {
+ return static_cast<TestAsyncLoader*>(param)->GetBlockImpl(pos, pBuf, size);
+ }
+
+ static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
+ return static_cast<TestAsyncLoader*>(pThis)->AddSegmentImpl(offset, size);
+ }
+
+ static FPDF_BOOL SIsDataAvail(FX_FILEAVAIL* pThis,
+ size_t offset,
+ size_t size) {
+ return static_cast<TestAsyncLoader*>(pThis)->IsDataAvailImpl(offset, size);
+ }
+
+ FPDF_FILEACCESS file_access_;
+
+ std::unique_ptr<char, pdfium::FreeDeleter> file_contents_;
+ size_t file_length_;
+ std::vector<std::pair<size_t, size_t>> requested_segments_;
+ bool is_new_data_available_ = true;
+
+ using Range = std::pair<size_t, size_t>;
+ struct range_compare {
+ bool operator()(const Range& lval, const Range& rval) const {
+ return lval.first < rval.first;
+ }
+ };
+ using RangesContainer = std::set<Range, range_compare>;
+ RangesContainer available_ranges_;
+};
+
+} // namespace
class FPDFDataAvailEmbeddertest : public EmbedderTest {};
@@ -19,3 +170,18 @@ TEST_F(FPDFDataAvailEmbeddertest, TrailerAsHexstring) {
EXPECT_FALSE(OpenDocument("trailer_as_hexstring.pdf"));
EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints_));
}
+
+TEST_F(FPDFDataAvailEmbeddertest, LoadUsingHintTables) {
+ TestAsyncLoader loader("feature_linearized_loading.pdf");
+ avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
+ ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
+ document_ = FPDFAvail_GetDocument(avail_, nullptr);
+ ASSERT_TRUE(document_);
+ ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints()));
+
+ // No new data available, to prevent load "Pages" node.
+ loader.set_is_new_data_available(false);
+ FPDF_PAGE page = LoadPage(1);
+ EXPECT_TRUE(page);
+ UnloadPage(page);
+}