diff options
-rw-r--r-- | core/fpdfapi/parser/cpdf_data_avail.cpp | 96 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_data_avail.h | 3 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_parser.cpp | 2 | ||||
-rw-r--r-- | fpdfsdk/fpdf_dataavail_embeddertest.cpp | 53 |
4 files changed, 110 insertions, 44 deletions
diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp index 3fe4b03da6..c1d17a98ed 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.cpp +++ b/core/fpdfapi/parser/cpdf_data_avail.cpp @@ -251,7 +251,6 @@ bool CPDF_DataAvail::CheckDocStatus(DownloadHints* pHints) { case PDF_DATAAVAIL_HEADER: return CheckHeader(pHints); case PDF_DATAAVAIL_FIRSTPAGE: - case PDF_DATAAVAIL_FIRSTPAGE_PREPARE: return CheckFirstPage(pHints); case PDF_DATAAVAIL_HINTTABLE: return CheckHintTables(pHints); @@ -594,7 +593,6 @@ bool CPDF_DataAvail::CheckFirstPage(DownloadHints* pHints) { return false; } - bool bNeedDownLoad = false; uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset(); dwEnd += 512; if ((FX_FILESIZE)dwEnd > m_dwFileLen) @@ -604,28 +602,6 @@ bool CPDF_DataAvail::CheckFirstPage(DownloadHints* pHints) { int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0; if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) { pHints->AddSegment(iStartPos, iSize); - bNeedDownLoad = true; - } - - m_dwLastXRefOffset = m_pLinearized->GetLastXRefOffset(); - FX_FILESIZE dwFileLen = m_pLinearized->GetFileSize(); - if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, - (uint32_t)(dwFileLen - m_dwLastXRefOffset))) { - if (m_docStatus == PDF_DATAAVAIL_FIRSTPAGE) { - uint32_t dwSize = (uint32_t)(dwFileLen - m_dwLastXRefOffset); - FX_FILESIZE offset = m_dwLastXRefOffset; - if (dwSize < 512 && dwFileLen > 512) { - dwSize = 512; - offset = dwFileLen - 512; - } - pHints->AddSegment(offset, dwSize); - } - } else { - m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE; - } - - if (bNeedDownLoad || m_docStatus != PDF_DATAAVAIL_FIRSTPAGE_PREPARE) { - m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE; return false; } @@ -649,7 +625,8 @@ bool CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset, size += 512; if (!m_pFileAvail->IsDataAvail(offset, size)) { - pHints->AddSegment(offset, size); + if (pHints) + pHints->AddSegment(offset, size); return false; } return true; @@ -1372,16 +1349,20 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData( DownloadHints* pHints) { if (m_bLinearedDataOK) return DataAvailable; + ASSERT(m_pLinearized); + if (!m_pLinearized->GetLastXRefOffset()) + return DataError; if (!m_bMainXRefLoadTried) { FX_SAFE_UINT32 data_size = m_dwFileLen; - data_size -= m_dwLastXRefOffset; + data_size -= m_pLinearized->GetLastXRefOffset(); if (!data_size.IsValid()) return DataError; - if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, + if (!m_pFileAvail->IsDataAvail(m_pLinearized->GetLastXRefOffset(), data_size.ValueOrDie())) { - pHints->AddSegment(m_dwLastXRefOffset, data_size.ValueOrDie()); + pHints->AddSegment(m_pLinearized->GetLastXRefOffset(), + data_size.ValueOrDie()); return DataNotAvailable; } @@ -1440,11 +1421,10 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedFirstPage( return DataNotAvailable; m_bAnnotsLoad = true; } - - DocAvailStatus nRet = CheckLinearizedData(pHints); - if (nRet == DataAvailable) - m_bPageLoadedOK = false; - return nRet; + const bool is_page_valid = ValidatePage(dwPage); + (void)is_page_valid; + ASSERT(is_page_valid); + return DataAvailable; } bool CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary* pDict) { @@ -1543,6 +1523,7 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail( m_pPageDict = m_pDocument->GetPage(safePage.ValueOrDie()); if (!m_pPageDict) { ResetFirstCheck(dwPage); + // This is XFA page. return DataAvailable; } @@ -1587,6 +1568,9 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail( ResetFirstCheck(dwPage); m_pagesLoadState.insert(dwPage); + const bool is_page_valid = ValidatePage(dwPage); + (void)is_page_valid; + ASSERT(is_page_valid); return DataAvailable; } @@ -1650,6 +1634,9 @@ CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) { m_pDocument->ReplaceIndirectObjectIfHigherGeneration( dwObjNum, ParseIndirectObjectAt(0, dwObjNum, m_pDocument)); } + const bool is_page_valid = ValidatePage(index); + (void)is_page_valid; + ASSERT(is_page_valid); return m_pDocument->GetPage(index); } @@ -1657,6 +1644,13 @@ CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail( DownloadHints* pHints) { if (!m_pDocument) return FormAvailable; + if (m_pLinearized) { + DocAvailStatus nDocStatus = CheckLinearizedData(pHints); + if (nDocStatus == DataError) + return FormError; + if (nDocStatus == DataNotAvailable) + return FormNotAvailable; + } if (!m_bLinearizedFormParamLoad) { CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); @@ -1667,14 +1661,7 @@ CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail( if (!pAcroForm) return FormNotExist; - DocAvailStatus nDocStatus = CheckLinearizedData(pHints); - if (nDocStatus == DataError) - return FormError; - if (nDocStatus == DataNotAvailable) - return FormNotAvailable; - - if (m_objs_array.empty()) - m_objs_array.push_back(pAcroForm->GetDict()); + m_objs_array.push_back(pAcroForm->GetDict()); m_bLinearizedFormParamLoad = true; } @@ -1685,9 +1672,36 @@ CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail( } m_objs_array.clear(); + const bool is_form_valid = ValidateForm(); + (void)is_form_valid; + ASSERT(is_form_valid); return FormAvailable; } +bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) { + FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage); + CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie()); + if (!pPageDict) + return false; + std::vector<CPDF_Object*> obj_array; + obj_array.push_back(pPageDict); + std::vector<CPDF_Object*> dummy; + return AreObjectsAvailable(obj_array, true, nullptr, dummy); +} + +bool CPDF_DataAvail::ValidateForm() { + CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); + if (!pRoot) + return true; + CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm"); + if (!pAcroForm) + return false; + std::vector<CPDF_Object*> obj_array; + obj_array.push_back(pAcroForm); + std::vector<CPDF_Object*> dummy; + return AreObjectsAvailable(obj_array, true, nullptr, dummy); +} + CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {} CPDF_DataAvail::PageNode::~PageNode() { diff --git a/core/fpdfapi/parser/cpdf_data_avail.h b/core/fpdfapi/parser/cpdf_data_avail.h index 250064f38a..0f10ceb6fc 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.h +++ b/core/fpdfapi/parser/cpdf_data_avail.h @@ -22,7 +22,6 @@ class CPDF_Parser; enum PDF_DATAAVAIL_STATUS { PDF_DATAAVAIL_HEADER = 0, PDF_DATAAVAIL_FIRSTPAGE, - PDF_DATAAVAIL_FIRSTPAGE_PREPARE, PDF_DATAAVAIL_HINTTABLE, PDF_DATAAVAIL_END, PDF_DATAAVAIL_CROSSREF, @@ -192,6 +191,8 @@ class CPDF_DataAvail final { bool IsFirstCheck(uint32_t dwPage); void ResetFirstCheck(uint32_t dwPage); bool IsDataAvail(FX_FILESIZE offset, uint32_t size, DownloadHints* pHints); + bool ValidatePage(uint32_t dwPage); + bool ValidateForm(); FileAvail* const m_pFileAvail; IFX_SeekableReadStream* const m_pFileRead; diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp index 2d96834964..c43614f628 100644 --- a/core/fpdfapi/parser/cpdf_parser.cpp +++ b/core/fpdfapi/parser/cpdf_parser.cpp @@ -1557,7 +1557,7 @@ CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() { m_pSyntax->GetNextChar(ch); while (PDFCharIsWhitespace(ch)) { ++dwCount; - if (m_pSyntax->m_FileLen >= + if (m_pSyntax->m_FileLen <= (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) { break; } diff --git a/fpdfsdk/fpdf_dataavail_embeddertest.cpp b/fpdfsdk/fpdf_dataavail_embeddertest.cpp index 47ba54bcde..cc95c1b177 100644 --- a/fpdfsdk/fpdf_dataavail_embeddertest.cpp +++ b/fpdfsdk/fpdf_dataavail_embeddertest.cpp @@ -45,13 +45,22 @@ class TestAsyncLoader : public FX_DOWNLOADHINTS, FX_FILEAVAIL { return requested_segments_; } - void ClearRequestedSegments() { requested_segments_.clear(); } + size_t max_requested_bound() const { return max_requested_bound_; } + + void ClearRequestedSegments() { + requested_segments_.clear(); + max_requested_bound_ = 0; + } bool is_new_data_available() const { return is_new_data_available_; } void set_is_new_data_available(bool is_new_data_available) { is_new_data_available_ = is_new_data_available; } + size_t max_already_available_bound() const { + return available_ranges_.empty() ? 0 : available_ranges_.rbegin()->second; + } + private: void SetDataAvailable(size_t start, size_t size) { if (size == 0) @@ -109,6 +118,7 @@ class TestAsyncLoader : public FX_DOWNLOADHINTS, FX_FILEAVAIL { void AddSegmentImpl(size_t offset, size_t size) { requested_segments_.push_back(std::make_pair(offset, size)); + max_requested_bound_ = std::max(max_requested_bound_, offset + size); } bool IsDataAvailImpl(size_t offset, size_t size) { @@ -143,6 +153,7 @@ class TestAsyncLoader : public FX_DOWNLOADHINTS, FX_FILEAVAIL { std::unique_ptr<char, pdfium::FreeDeleter> file_contents_; size_t file_length_; std::vector<std::pair<size_t, size_t>> requested_segments_; + size_t max_requested_bound_ = 0; bool is_new_data_available_ = true; using Range = std::pair<size_t, size_t>; @@ -185,3 +196,43 @@ TEST_F(FPDFDataAvailEmbeddertest, LoadUsingHintTables) { EXPECT_TRUE(page); UnloadPage(page); } + +TEST_F(FPDFDataAvailEmbeddertest, + DoNotLoadMainCrossRefForFirstPageIfLinearized) { + TestAsyncLoader loader("feature_linearized_loading.pdf"); + avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access()); + ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints())); + document_ = FPDFAvail_GetDocument(avail_, nullptr); + ASSERT_TRUE(document_); + const int first_page_num = FPDFAvail_GetFirstPageNum(document_); + + // The main cross ref table should not be processed. + // (It is always at file end) + EXPECT_GT(loader.file_access()->m_FileLen, + loader.max_already_available_bound()); + + // Prevent access to non requested data to coerce the parser to send new + // request for non available (non requested before) data. + loader.set_is_new_data_available(false); + FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints()); + + // The main cross ref table should not be requested. + // (It is always at file end) + EXPECT_GT(loader.file_access()->m_FileLen, loader.max_requested_bound()); + + // Allow parse page. + loader.set_is_new_data_available(true); + ASSERT_EQ(PDF_DATA_AVAIL, + FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints())); + + // The main cross ref table should not be processed. + // (It is always at file end) + EXPECT_GT(loader.file_access()->m_FileLen, + loader.max_already_available_bound()); + + // Prevent loading data, while page loading. + loader.set_is_new_data_available(false); + FPDF_PAGE page = LoadPage(first_page_num); + EXPECT_TRUE(page); + UnloadPage(page); +} |