diff options
author | art-snake <art-snake@yandex-team.ru> | 2016-11-09 21:32:46 -0800 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2016-11-09 21:32:46 -0800 |
commit | 61f8e9c5aeb0d8cb5477e0248b685214746bada7 (patch) | |
tree | 43c01abc09c72d088b5f79b54fbec37c0a60d5a3 | |
parent | 3f8cb532c93bd2839073ed4949d051245de5a4cb (diff) | |
download | pdfium-61f8e9c5aeb0d8cb5477e0248b685214746bada7.tar.xz |
Do not load main cross ref table for first page in linearized pdf.
For the first page in linearized PDFs, it is not necessary to load the
main xref table, because it have separate special xref table in the file
header.
This decreasing the time to show first page in PDF plugin.
Review-Url: https://codereview.chromium.org/2483633002
-rw-r--r-- | core/fpdfapi/parser/cpdf_data_avail.cpp | 96 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_data_avail.h | 3 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_parser.cpp | 2 | ||||
-rw-r--r-- | fpdfsdk/fpdf_dataavail_embeddertest.cpp | 53 |
4 files changed, 110 insertions, 44 deletions
diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp index 3fe4b03da6..c1d17a98ed 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.cpp +++ b/core/fpdfapi/parser/cpdf_data_avail.cpp @@ -251,7 +251,6 @@ bool CPDF_DataAvail::CheckDocStatus(DownloadHints* pHints) { case PDF_DATAAVAIL_HEADER: return CheckHeader(pHints); case PDF_DATAAVAIL_FIRSTPAGE: - case PDF_DATAAVAIL_FIRSTPAGE_PREPARE: return CheckFirstPage(pHints); case PDF_DATAAVAIL_HINTTABLE: return CheckHintTables(pHints); @@ -594,7 +593,6 @@ bool CPDF_DataAvail::CheckFirstPage(DownloadHints* pHints) { return false; } - bool bNeedDownLoad = false; uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset(); dwEnd += 512; if ((FX_FILESIZE)dwEnd > m_dwFileLen) @@ -604,28 +602,6 @@ bool CPDF_DataAvail::CheckFirstPage(DownloadHints* pHints) { int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0; if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) { pHints->AddSegment(iStartPos, iSize); - bNeedDownLoad = true; - } - - m_dwLastXRefOffset = m_pLinearized->GetLastXRefOffset(); - FX_FILESIZE dwFileLen = m_pLinearized->GetFileSize(); - if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, - (uint32_t)(dwFileLen - m_dwLastXRefOffset))) { - if (m_docStatus == PDF_DATAAVAIL_FIRSTPAGE) { - uint32_t dwSize = (uint32_t)(dwFileLen - m_dwLastXRefOffset); - FX_FILESIZE offset = m_dwLastXRefOffset; - if (dwSize < 512 && dwFileLen > 512) { - dwSize = 512; - offset = dwFileLen - 512; - } - pHints->AddSegment(offset, dwSize); - } - } else { - m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE; - } - - if (bNeedDownLoad || m_docStatus != PDF_DATAAVAIL_FIRSTPAGE_PREPARE) { - m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE; return false; } @@ -649,7 +625,8 @@ bool CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset, size += 512; if (!m_pFileAvail->IsDataAvail(offset, size)) { - pHints->AddSegment(offset, size); + if (pHints) + pHints->AddSegment(offset, size); return false; } return true; @@ -1372,16 +1349,20 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData( DownloadHints* pHints) { if (m_bLinearedDataOK) return DataAvailable; + ASSERT(m_pLinearized); + if (!m_pLinearized->GetLastXRefOffset()) + return DataError; if (!m_bMainXRefLoadTried) { FX_SAFE_UINT32 data_size = m_dwFileLen; - data_size -= m_dwLastXRefOffset; + data_size -= m_pLinearized->GetLastXRefOffset(); if (!data_size.IsValid()) return DataError; - if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, + if (!m_pFileAvail->IsDataAvail(m_pLinearized->GetLastXRefOffset(), data_size.ValueOrDie())) { - pHints->AddSegment(m_dwLastXRefOffset, data_size.ValueOrDie()); + pHints->AddSegment(m_pLinearized->GetLastXRefOffset(), + data_size.ValueOrDie()); return DataNotAvailable; } @@ -1440,11 +1421,10 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedFirstPage( return DataNotAvailable; m_bAnnotsLoad = true; } - - DocAvailStatus nRet = CheckLinearizedData(pHints); - if (nRet == DataAvailable) - m_bPageLoadedOK = false; - return nRet; + const bool is_page_valid = ValidatePage(dwPage); + (void)is_page_valid; + ASSERT(is_page_valid); + return DataAvailable; } bool CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary* pDict) { @@ -1543,6 +1523,7 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail( m_pPageDict = m_pDocument->GetPage(safePage.ValueOrDie()); if (!m_pPageDict) { ResetFirstCheck(dwPage); + // This is XFA page. return DataAvailable; } @@ -1587,6 +1568,9 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail( ResetFirstCheck(dwPage); m_pagesLoadState.insert(dwPage); + const bool is_page_valid = ValidatePage(dwPage); + (void)is_page_valid; + ASSERT(is_page_valid); return DataAvailable; } @@ -1650,6 +1634,9 @@ CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) { m_pDocument->ReplaceIndirectObjectIfHigherGeneration( dwObjNum, ParseIndirectObjectAt(0, dwObjNum, m_pDocument)); } + const bool is_page_valid = ValidatePage(index); + (void)is_page_valid; + ASSERT(is_page_valid); return m_pDocument->GetPage(index); } @@ -1657,6 +1644,13 @@ CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail( DownloadHints* pHints) { if (!m_pDocument) return FormAvailable; + if (m_pLinearized) { + DocAvailStatus nDocStatus = CheckLinearizedData(pHints); + if (nDocStatus == DataError) + return FormError; + if (nDocStatus == DataNotAvailable) + return FormNotAvailable; + } if (!m_bLinearizedFormParamLoad) { CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); @@ -1667,14 +1661,7 @@ CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail( if (!pAcroForm) return FormNotExist; - DocAvailStatus nDocStatus = CheckLinearizedData(pHints); - if (nDocStatus == DataError) - return FormError; - if (nDocStatus == DataNotAvailable) - return FormNotAvailable; - - if (m_objs_array.empty()) - m_objs_array.push_back(pAcroForm->GetDict()); + m_objs_array.push_back(pAcroForm->GetDict()); m_bLinearizedFormParamLoad = true; } @@ -1685,9 +1672,36 @@ CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail( } m_objs_array.clear(); + const bool is_form_valid = ValidateForm(); + (void)is_form_valid; + ASSERT(is_form_valid); return FormAvailable; } +bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) { + FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage); + CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie()); + if (!pPageDict) + return false; + std::vector<CPDF_Object*> obj_array; + obj_array.push_back(pPageDict); + std::vector<CPDF_Object*> dummy; + return AreObjectsAvailable(obj_array, true, nullptr, dummy); +} + +bool CPDF_DataAvail::ValidateForm() { + CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); + if (!pRoot) + return true; + CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm"); + if (!pAcroForm) + return false; + std::vector<CPDF_Object*> obj_array; + obj_array.push_back(pAcroForm); + std::vector<CPDF_Object*> dummy; + return AreObjectsAvailable(obj_array, true, nullptr, dummy); +} + CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {} CPDF_DataAvail::PageNode::~PageNode() { diff --git a/core/fpdfapi/parser/cpdf_data_avail.h b/core/fpdfapi/parser/cpdf_data_avail.h index 250064f38a..0f10ceb6fc 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.h +++ b/core/fpdfapi/parser/cpdf_data_avail.h @@ -22,7 +22,6 @@ class CPDF_Parser; enum PDF_DATAAVAIL_STATUS { PDF_DATAAVAIL_HEADER = 0, PDF_DATAAVAIL_FIRSTPAGE, - PDF_DATAAVAIL_FIRSTPAGE_PREPARE, PDF_DATAAVAIL_HINTTABLE, PDF_DATAAVAIL_END, PDF_DATAAVAIL_CROSSREF, @@ -192,6 +191,8 @@ class CPDF_DataAvail final { bool IsFirstCheck(uint32_t dwPage); void ResetFirstCheck(uint32_t dwPage); bool IsDataAvail(FX_FILESIZE offset, uint32_t size, DownloadHints* pHints); + bool ValidatePage(uint32_t dwPage); + bool ValidateForm(); FileAvail* const m_pFileAvail; IFX_SeekableReadStream* const m_pFileRead; diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp index 2d96834964..c43614f628 100644 --- a/core/fpdfapi/parser/cpdf_parser.cpp +++ b/core/fpdfapi/parser/cpdf_parser.cpp @@ -1557,7 +1557,7 @@ CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() { m_pSyntax->GetNextChar(ch); while (PDFCharIsWhitespace(ch)) { ++dwCount; - if (m_pSyntax->m_FileLen >= + if (m_pSyntax->m_FileLen <= (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) { break; } diff --git a/fpdfsdk/fpdf_dataavail_embeddertest.cpp b/fpdfsdk/fpdf_dataavail_embeddertest.cpp index 47ba54bcde..cc95c1b177 100644 --- a/fpdfsdk/fpdf_dataavail_embeddertest.cpp +++ b/fpdfsdk/fpdf_dataavail_embeddertest.cpp @@ -45,13 +45,22 @@ class TestAsyncLoader : public FX_DOWNLOADHINTS, FX_FILEAVAIL { return requested_segments_; } - void ClearRequestedSegments() { requested_segments_.clear(); } + size_t max_requested_bound() const { return max_requested_bound_; } + + void ClearRequestedSegments() { + requested_segments_.clear(); + max_requested_bound_ = 0; + } bool is_new_data_available() const { return is_new_data_available_; } void set_is_new_data_available(bool is_new_data_available) { is_new_data_available_ = is_new_data_available; } + size_t max_already_available_bound() const { + return available_ranges_.empty() ? 0 : available_ranges_.rbegin()->second; + } + private: void SetDataAvailable(size_t start, size_t size) { if (size == 0) @@ -109,6 +118,7 @@ class TestAsyncLoader : public FX_DOWNLOADHINTS, FX_FILEAVAIL { void AddSegmentImpl(size_t offset, size_t size) { requested_segments_.push_back(std::make_pair(offset, size)); + max_requested_bound_ = std::max(max_requested_bound_, offset + size); } bool IsDataAvailImpl(size_t offset, size_t size) { @@ -143,6 +153,7 @@ class TestAsyncLoader : public FX_DOWNLOADHINTS, FX_FILEAVAIL { std::unique_ptr<char, pdfium::FreeDeleter> file_contents_; size_t file_length_; std::vector<std::pair<size_t, size_t>> requested_segments_; + size_t max_requested_bound_ = 0; bool is_new_data_available_ = true; using Range = std::pair<size_t, size_t>; @@ -185,3 +196,43 @@ TEST_F(FPDFDataAvailEmbeddertest, LoadUsingHintTables) { EXPECT_TRUE(page); UnloadPage(page); } + +TEST_F(FPDFDataAvailEmbeddertest, + DoNotLoadMainCrossRefForFirstPageIfLinearized) { + TestAsyncLoader loader("feature_linearized_loading.pdf"); + avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access()); + ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints())); + document_ = FPDFAvail_GetDocument(avail_, nullptr); + ASSERT_TRUE(document_); + const int first_page_num = FPDFAvail_GetFirstPageNum(document_); + + // The main cross ref table should not be processed. + // (It is always at file end) + EXPECT_GT(loader.file_access()->m_FileLen, + loader.max_already_available_bound()); + + // Prevent access to non requested data to coerce the parser to send new + // request for non available (non requested before) data. + loader.set_is_new_data_available(false); + FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints()); + + // The main cross ref table should not be requested. + // (It is always at file end) + EXPECT_GT(loader.file_access()->m_FileLen, loader.max_requested_bound()); + + // Allow parse page. + loader.set_is_new_data_available(true); + ASSERT_EQ(PDF_DATA_AVAIL, + FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints())); + + // The main cross ref table should not be processed. + // (It is always at file end) + EXPECT_GT(loader.file_access()->m_FileLen, + loader.max_already_available_bound()); + + // Prevent loading data, while page loading. + loader.set_is_new_data_available(false); + FPDF_PAGE page = LoadPage(first_page_num); + EXPECT_TRUE(page); + UnloadPage(page); +} |