summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/fpdfapi/parser/cpdf_data_avail.cpp96
-rw-r--r--core/fpdfapi/parser/cpdf_data_avail.h3
-rw-r--r--core/fpdfapi/parser/cpdf_parser.cpp2
-rw-r--r--fpdfsdk/fpdf_dataavail_embeddertest.cpp53
4 files changed, 110 insertions, 44 deletions
diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp
index 3fe4b03da6..c1d17a98ed 100644
--- a/core/fpdfapi/parser/cpdf_data_avail.cpp
+++ b/core/fpdfapi/parser/cpdf_data_avail.cpp
@@ -251,7 +251,6 @@ bool CPDF_DataAvail::CheckDocStatus(DownloadHints* pHints) {
case PDF_DATAAVAIL_HEADER:
return CheckHeader(pHints);
case PDF_DATAAVAIL_FIRSTPAGE:
- case PDF_DATAAVAIL_FIRSTPAGE_PREPARE:
return CheckFirstPage(pHints);
case PDF_DATAAVAIL_HINTTABLE:
return CheckHintTables(pHints);
@@ -594,7 +593,6 @@ bool CPDF_DataAvail::CheckFirstPage(DownloadHints* pHints) {
return false;
}
- bool bNeedDownLoad = false;
uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
dwEnd += 512;
if ((FX_FILESIZE)dwEnd > m_dwFileLen)
@@ -604,28 +602,6 @@ bool CPDF_DataAvail::CheckFirstPage(DownloadHints* pHints) {
int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0;
if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) {
pHints->AddSegment(iStartPos, iSize);
- bNeedDownLoad = true;
- }
-
- m_dwLastXRefOffset = m_pLinearized->GetLastXRefOffset();
- FX_FILESIZE dwFileLen = m_pLinearized->GetFileSize();
- if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset,
- (uint32_t)(dwFileLen - m_dwLastXRefOffset))) {
- if (m_docStatus == PDF_DATAAVAIL_FIRSTPAGE) {
- uint32_t dwSize = (uint32_t)(dwFileLen - m_dwLastXRefOffset);
- FX_FILESIZE offset = m_dwLastXRefOffset;
- if (dwSize < 512 && dwFileLen > 512) {
- dwSize = 512;
- offset = dwFileLen - 512;
- }
- pHints->AddSegment(offset, dwSize);
- }
- } else {
- m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE;
- }
-
- if (bNeedDownLoad || m_docStatus != PDF_DATAAVAIL_FIRSTPAGE_PREPARE) {
- m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE;
return false;
}
@@ -649,7 +625,8 @@ bool CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset,
size += 512;
if (!m_pFileAvail->IsDataAvail(offset, size)) {
- pHints->AddSegment(offset, size);
+ if (pHints)
+ pHints->AddSegment(offset, size);
return false;
}
return true;
@@ -1372,16 +1349,20 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData(
DownloadHints* pHints) {
if (m_bLinearedDataOK)
return DataAvailable;
+ ASSERT(m_pLinearized);
+ if (!m_pLinearized->GetLastXRefOffset())
+ return DataError;
if (!m_bMainXRefLoadTried) {
FX_SAFE_UINT32 data_size = m_dwFileLen;
- data_size -= m_dwLastXRefOffset;
+ data_size -= m_pLinearized->GetLastXRefOffset();
if (!data_size.IsValid())
return DataError;
- if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset,
+ if (!m_pFileAvail->IsDataAvail(m_pLinearized->GetLastXRefOffset(),
data_size.ValueOrDie())) {
- pHints->AddSegment(m_dwLastXRefOffset, data_size.ValueOrDie());
+ pHints->AddSegment(m_pLinearized->GetLastXRefOffset(),
+ data_size.ValueOrDie());
return DataNotAvailable;
}
@@ -1440,11 +1421,10 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedFirstPage(
return DataNotAvailable;
m_bAnnotsLoad = true;
}
-
- DocAvailStatus nRet = CheckLinearizedData(pHints);
- if (nRet == DataAvailable)
- m_bPageLoadedOK = false;
- return nRet;
+ const bool is_page_valid = ValidatePage(dwPage);
+ (void)is_page_valid;
+ ASSERT(is_page_valid);
+ return DataAvailable;
}
bool CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary* pDict) {
@@ -1543,6 +1523,7 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
m_pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
if (!m_pPageDict) {
ResetFirstCheck(dwPage);
+ // This is XFA page.
return DataAvailable;
}
@@ -1587,6 +1568,9 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
ResetFirstCheck(dwPage);
m_pagesLoadState.insert(dwPage);
+ const bool is_page_valid = ValidatePage(dwPage);
+ (void)is_page_valid;
+ ASSERT(is_page_valid);
return DataAvailable;
}
@@ -1650,6 +1634,9 @@ CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) {
m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
dwObjNum, ParseIndirectObjectAt(0, dwObjNum, m_pDocument));
}
+ const bool is_page_valid = ValidatePage(index);
+ (void)is_page_valid;
+ ASSERT(is_page_valid);
return m_pDocument->GetPage(index);
}
@@ -1657,6 +1644,13 @@ CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
DownloadHints* pHints) {
if (!m_pDocument)
return FormAvailable;
+ if (m_pLinearized) {
+ DocAvailStatus nDocStatus = CheckLinearizedData(pHints);
+ if (nDocStatus == DataError)
+ return FormError;
+ if (nDocStatus == DataNotAvailable)
+ return FormNotAvailable;
+ }
if (!m_bLinearizedFormParamLoad) {
CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
@@ -1667,14 +1661,7 @@ CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
if (!pAcroForm)
return FormNotExist;
- DocAvailStatus nDocStatus = CheckLinearizedData(pHints);
- if (nDocStatus == DataError)
- return FormError;
- if (nDocStatus == DataNotAvailable)
- return FormNotAvailable;
-
- if (m_objs_array.empty())
- m_objs_array.push_back(pAcroForm->GetDict());
+ m_objs_array.push_back(pAcroForm->GetDict());
m_bLinearizedFormParamLoad = true;
}
@@ -1685,9 +1672,36 @@ CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
}
m_objs_array.clear();
+ const bool is_form_valid = ValidateForm();
+ (void)is_form_valid;
+ ASSERT(is_form_valid);
return FormAvailable;
}
+bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) {
+ FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
+ CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
+ if (!pPageDict)
+ return false;
+ std::vector<CPDF_Object*> obj_array;
+ obj_array.push_back(pPageDict);
+ std::vector<CPDF_Object*> dummy;
+ return AreObjectsAvailable(obj_array, true, nullptr, dummy);
+}
+
+bool CPDF_DataAvail::ValidateForm() {
+ CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
+ if (!pRoot)
+ return true;
+ CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
+ if (!pAcroForm)
+ return false;
+ std::vector<CPDF_Object*> obj_array;
+ obj_array.push_back(pAcroForm);
+ std::vector<CPDF_Object*> dummy;
+ return AreObjectsAvailable(obj_array, true, nullptr, dummy);
+}
+
CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {}
CPDF_DataAvail::PageNode::~PageNode() {
diff --git a/core/fpdfapi/parser/cpdf_data_avail.h b/core/fpdfapi/parser/cpdf_data_avail.h
index 250064f38a..0f10ceb6fc 100644
--- a/core/fpdfapi/parser/cpdf_data_avail.h
+++ b/core/fpdfapi/parser/cpdf_data_avail.h
@@ -22,7 +22,6 @@ class CPDF_Parser;
enum PDF_DATAAVAIL_STATUS {
PDF_DATAAVAIL_HEADER = 0,
PDF_DATAAVAIL_FIRSTPAGE,
- PDF_DATAAVAIL_FIRSTPAGE_PREPARE,
PDF_DATAAVAIL_HINTTABLE,
PDF_DATAAVAIL_END,
PDF_DATAAVAIL_CROSSREF,
@@ -192,6 +191,8 @@ class CPDF_DataAvail final {
bool IsFirstCheck(uint32_t dwPage);
void ResetFirstCheck(uint32_t dwPage);
bool IsDataAvail(FX_FILESIZE offset, uint32_t size, DownloadHints* pHints);
+ bool ValidatePage(uint32_t dwPage);
+ bool ValidateForm();
FileAvail* const m_pFileAvail;
IFX_SeekableReadStream* const m_pFileRead;
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index 2d96834964..c43614f628 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -1557,7 +1557,7 @@ CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
m_pSyntax->GetNextChar(ch);
while (PDFCharIsWhitespace(ch)) {
++dwCount;
- if (m_pSyntax->m_FileLen >=
+ if (m_pSyntax->m_FileLen <=
(FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) {
break;
}
diff --git a/fpdfsdk/fpdf_dataavail_embeddertest.cpp b/fpdfsdk/fpdf_dataavail_embeddertest.cpp
index 47ba54bcde..cc95c1b177 100644
--- a/fpdfsdk/fpdf_dataavail_embeddertest.cpp
+++ b/fpdfsdk/fpdf_dataavail_embeddertest.cpp
@@ -45,13 +45,22 @@ class TestAsyncLoader : public FX_DOWNLOADHINTS, FX_FILEAVAIL {
return requested_segments_;
}
- void ClearRequestedSegments() { requested_segments_.clear(); }
+ size_t max_requested_bound() const { return max_requested_bound_; }
+
+ void ClearRequestedSegments() {
+ requested_segments_.clear();
+ max_requested_bound_ = 0;
+ }
bool is_new_data_available() const { return is_new_data_available_; }
void set_is_new_data_available(bool is_new_data_available) {
is_new_data_available_ = is_new_data_available;
}
+ size_t max_already_available_bound() const {
+ return available_ranges_.empty() ? 0 : available_ranges_.rbegin()->second;
+ }
+
private:
void SetDataAvailable(size_t start, size_t size) {
if (size == 0)
@@ -109,6 +118,7 @@ class TestAsyncLoader : public FX_DOWNLOADHINTS, FX_FILEAVAIL {
void AddSegmentImpl(size_t offset, size_t size) {
requested_segments_.push_back(std::make_pair(offset, size));
+ max_requested_bound_ = std::max(max_requested_bound_, offset + size);
}
bool IsDataAvailImpl(size_t offset, size_t size) {
@@ -143,6 +153,7 @@ class TestAsyncLoader : public FX_DOWNLOADHINTS, FX_FILEAVAIL {
std::unique_ptr<char, pdfium::FreeDeleter> file_contents_;
size_t file_length_;
std::vector<std::pair<size_t, size_t>> requested_segments_;
+ size_t max_requested_bound_ = 0;
bool is_new_data_available_ = true;
using Range = std::pair<size_t, size_t>;
@@ -185,3 +196,43 @@ TEST_F(FPDFDataAvailEmbeddertest, LoadUsingHintTables) {
EXPECT_TRUE(page);
UnloadPage(page);
}
+
+TEST_F(FPDFDataAvailEmbeddertest,
+ DoNotLoadMainCrossRefForFirstPageIfLinearized) {
+ TestAsyncLoader loader("feature_linearized_loading.pdf");
+ avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
+ ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
+ document_ = FPDFAvail_GetDocument(avail_, nullptr);
+ ASSERT_TRUE(document_);
+ const int first_page_num = FPDFAvail_GetFirstPageNum(document_);
+
+ // The main cross ref table should not be processed.
+ // (It is always at file end)
+ EXPECT_GT(loader.file_access()->m_FileLen,
+ loader.max_already_available_bound());
+
+ // Prevent access to non requested data to coerce the parser to send new
+ // request for non available (non requested before) data.
+ loader.set_is_new_data_available(false);
+ FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints());
+
+ // The main cross ref table should not be requested.
+ // (It is always at file end)
+ EXPECT_GT(loader.file_access()->m_FileLen, loader.max_requested_bound());
+
+ // Allow parse page.
+ loader.set_is_new_data_available(true);
+ ASSERT_EQ(PDF_DATA_AVAIL,
+ FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints()));
+
+ // The main cross ref table should not be processed.
+ // (It is always at file end)
+ EXPECT_GT(loader.file_access()->m_FileLen,
+ loader.max_already_available_bound());
+
+ // Prevent loading data, while page loading.
+ loader.set_is_new_data_available(false);
+ FPDF_PAGE page = LoadPage(first_page_num);
+ EXPECT_TRUE(page);
+ UnloadPage(page);
+}