From 5f0e64435c97755a7d309e80ea0a4dad83e76e73 Mon Sep 17 00:00:00 2001 From: Artem Strygin Date: Thu, 10 Aug 2017 22:11:59 +0300 Subject: Refactoring of CPDF_DataAvail::GetObject. Use ReadValidator to request exact data on object read. Change-Id: I1d1863097fa2b037e1bb2e4e89b93d26c5d8e066 Reviewed-on: https://pdfium-review.googlesource.com/10510 Commit-Queue: Art Snake Reviewed-by: dsinclair --- core/fpdfapi/parser/cpdf_data_avail.cpp | 96 ++++++++++++++------------------- core/fpdfapi/parser/cpdf_data_avail.h | 27 ++++------ 2 files changed, 50 insertions(+), 73 deletions(-) diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp index 2f17345ab6..ed5b2880b5 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.cpp +++ b/core/fpdfapi/parser/cpdf_data_avail.cpp @@ -265,10 +265,10 @@ bool CPDF_DataAvail::CheckAcroFormSubObject() { return true; } -bool CPDF_DataAvail::CheckAcroForm(DownloadHints* pHints) { +bool CPDF_DataAvail::CheckAcroForm() { bool bExist = false; std::unique_ptr pAcroForm = - GetObject(m_dwAcroFormObjNum, pHints, &bExist); + GetObject(m_dwAcroFormObjNum, &bExist); if (!bExist) { m_docStatus = PDF_DATAAVAIL_PAGETREE; return true; @@ -312,18 +312,18 @@ bool CPDF_DataAvail::CheckDocStatus(DownloadHints* pHints) { case PDF_DATAAVAIL_LOADALLFILE: return LoadAllFile(pHints); case PDF_DATAAVAIL_ROOT: - return CheckRoot(pHints); + return CheckRoot(); case PDF_DATAAVAIL_INFO: - return CheckInfo(pHints); + return CheckInfo(); case PDF_DATAAVAIL_ACROFORM: - return CheckAcroForm(pHints); + return CheckAcroForm(); case PDF_DATAAVAIL_PAGETREE: if (m_bTotalLoadPageTree) - return CheckPages(pHints); - return LoadDocPages(pHints); + return CheckPages(); + return LoadDocPages(); case PDF_DATAAVAIL_PAGE: if (m_bTotalLoadPageTree) - return CheckPage(pHints); + return CheckPage(); m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD; return true; case PDF_DATAAVAIL_ERROR: @@ -339,9 +339,9 @@ bool CPDF_DataAvail::CheckDocStatus(DownloadHints* pHints) { bool CPDF_DataAvail::CheckPageStatus(DownloadHints* pHints) { switch (m_docStatus) { case PDF_DATAAVAIL_PAGETREE: - return CheckPages(pHints); + return CheckPages(); case PDF_DATAAVAIL_PAGE: - return CheckPage(pHints); + return CheckPage(); case PDF_DATAAVAIL_ERROR: return LoadAllFile(pHints); default: @@ -377,30 +377,21 @@ bool CPDF_DataAvail::LoadAllXref(DownloadHints* pHints) { } std::unique_ptr CPDF_DataAvail::GetObject(uint32_t objnum, - DownloadHints* pHints, bool* pExistInFile) { - uint32_t size = 0; - FX_FILESIZE offset = 0; CPDF_Parser* pParser = nullptr; if (pExistInFile) *pExistInFile = true; - if (m_pDocument) { - size = GetObjectSize(objnum, offset); - pParser = m_pDocument->GetParser(); - } else { - size = (uint32_t)m_parser.GetObjectSize(objnum); - offset = m_parser.GetObjectOffset(objnum); - pParser = &m_parser; - } - - if (!IsDataAvail(offset, size, pHints)) - return nullptr; + pParser = m_pDocument ? m_pDocument->GetParser() : &m_parser; std::unique_ptr pRet; - if (pParser) + if (pParser) { + const CPDF_ReadValidator::Session read_session(GetValidator().Get()); pRet = pParser->ParseIndirectObject(nullptr, objnum); + if (GetValidator()->has_read_problems()) + return nullptr; + } if (!pRet && pExistInFile) *pExistInFile = false; @@ -408,10 +399,9 @@ std::unique_ptr CPDF_DataAvail::GetObject(uint32_t objnum, return pRet; } -bool CPDF_DataAvail::CheckInfo(DownloadHints* pHints) { +bool CPDF_DataAvail::CheckInfo() { bool bExist = false; - std::unique_ptr pInfo = - GetObject(m_dwInfoObjNum, pHints, &bExist); + std::unique_ptr pInfo = GetObject(m_dwInfoObjNum, &bExist); if (bExist && !pInfo) { if (m_docStatus == PDF_DATAAVAIL_ERROR) { m_docStatus = PDF_DATAAVAIL_LOADALLFILE; @@ -426,9 +416,9 @@ bool CPDF_DataAvail::CheckInfo(DownloadHints* pHints) { return true; } -bool CPDF_DataAvail::CheckRoot(DownloadHints* pHints) { +bool CPDF_DataAvail::CheckRoot() { bool bExist = false; - m_pRoot = GetObject(m_dwRootObjNum, pHints, &bExist); + m_pRoot = GetObject(m_dwRootObjNum, &bExist); if (!bExist) { m_docStatus = PDF_DATAAVAIL_LOADALLFILE; return true; @@ -494,12 +484,11 @@ void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) { m_pageMapCheckState.erase(dwPage); } -bool CPDF_DataAvail::CheckPage(DownloadHints* pHints) { +bool CPDF_DataAvail::CheckPage() { std::vector UnavailObjList; for (uint32_t dwPageObjNum : m_PageObjList) { bool bExists = false; - std::unique_ptr pObj = - GetObject(dwPageObjNum, pHints, &bExists); + std::unique_ptr pObj = GetObject(dwPageObjNum, &bExists); if (!pObj) { if (bExists) UnavailObjList.push_back(dwPageObjNum); @@ -572,10 +561,9 @@ bool CPDF_DataAvail::GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages) { return true; } -bool CPDF_DataAvail::CheckPages(DownloadHints* pHints) { +bool CPDF_DataAvail::CheckPages() { bool bExists = false; - std::unique_ptr pPages = - GetObject(m_PagesObjNum, pHints, &bExists); + std::unique_ptr pPages = GetObject(m_PagesObjNum, &bExists); if (!bExists) { m_docStatus = PDF_DATAAVAIL_LOADALLFILE; return true; @@ -1118,11 +1106,11 @@ bool CPDF_DataAvail::CheckPage(uint32_t dwPage, DownloadHints* pHints) { while (true) { switch (m_docStatus) { case PDF_DATAAVAIL_PAGETREE: - if (!LoadDocPages(pHints)) + if (!LoadDocPages()) return false; break; case PDF_DATAAVAIL_PAGE: - if (!LoadDocPage(dwPage, pHints)) + if (!LoadDocPage(dwPage)) return false; break; case PDF_DATAAVAIL_ERROR: @@ -1138,10 +1126,9 @@ bool CPDF_DataAvail::CheckPage(uint32_t dwPage, DownloadHints* pHints) { } bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo, - PageNode* pPageNode, - DownloadHints* pHints) { + PageNode* pPageNode) { bool bExists = false; - std::unique_ptr pPages = GetObject(dwPageNo, pHints, &bExists); + std::unique_ptr pPages = GetObject(dwPageNo, &bExists); if (!bExists) { m_docStatus = PDF_DATAAVAIL_ERROR; return false; @@ -1170,10 +1157,9 @@ bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo, } bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo, - PageNode* pPageNode, - DownloadHints* pHints) { + PageNode* pPageNode) { bool bExists = false; - std::unique_ptr pPage = GetObject(dwPageNo, pHints, &bExists); + std::unique_ptr pPage = GetObject(dwPageNo, &bExists); if (!bExists) { m_docStatus = PDF_DATAAVAIL_ERROR; return false; @@ -1243,7 +1229,6 @@ bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo, bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode, int32_t iPage, int32_t& iCount, - DownloadHints* pHints, int level) { if (level >= kMaxPageRecursionDepth) return false; @@ -1260,12 +1245,12 @@ bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode, if (pNode->m_type == PDF_PAGENODE_UNKNOWN) { // Updates the type for the unknown page node. - if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode, pHints)) + if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode)) return false; } if (pNode->m_type == PDF_PAGENODE_ARRAY) { // Updates a more specific type for the array page node. - if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode, pHints)) + if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode)) return false; } switch (pNode->m_type) { @@ -1275,7 +1260,7 @@ bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode, m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo); break; case PDF_PAGENODE_PAGES: - if (!CheckPageNode(*pNode, iPage, iCount, pHints, level + 1)) + if (!CheckPageNode(*pNode, iPage, iCount, level + 1)) return false; break; case PDF_PAGENODE_UNKNOWN: @@ -1291,7 +1276,7 @@ bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode, return true; } -bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage, DownloadHints* pHints) { +bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage) { FX_SAFE_INT32 safePage = pdfium::base::checked_cast(dwPage); int32_t iPage = safePage.ValueOrDie(); if (m_pDocument->GetPageCount() <= iPage || @@ -1304,13 +1289,12 @@ bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage, DownloadHints* pHints) { return true; } int32_t iCount = -1; - return CheckPageNode(m_PageNode, iPage, iCount, pHints, 0); + return CheckPageNode(m_PageNode, iPage, iCount, 0); } -bool CPDF_DataAvail::CheckPageCount(DownloadHints* pHints) { +bool CPDF_DataAvail::CheckPageCount() { bool bExists = false; - std::unique_ptr pPages = - GetObject(m_PagesObjNum, pHints, &bExists); + std::unique_ptr pPages = GetObject(m_PagesObjNum, &bExists); if (!bExists) { m_docStatus = PDF_DATAAVAIL_ERROR; return false; @@ -1329,11 +1313,11 @@ bool CPDF_DataAvail::CheckPageCount(DownloadHints* pHints) { return pPagesDict->GetIntegerFor("Count") > 0; } -bool CPDF_DataAvail::LoadDocPages(DownloadHints* pHints) { - if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode, pHints)) +bool CPDF_DataAvail::LoadDocPages() { + if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode)) return false; - if (CheckPageCount(pHints)) { + if (CheckPageCount()) { m_docStatus = PDF_DATAAVAIL_PAGE; return true; } diff --git a/core/fpdfapi/parser/cpdf_data_avail.h b/core/fpdfapi/parser/cpdf_data_avail.h index b857c659d9..f15f7c0e1d 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.h +++ b/core/fpdfapi/parser/cpdf_data_avail.h @@ -138,13 +138,12 @@ class CPDF_DataAvail final { bool CheckCrossRef(DownloadHints* pHints); bool CheckCrossRefItem(); bool CheckTrailer(DownloadHints* pHints); - bool CheckRoot(DownloadHints* pHints); - bool CheckInfo(DownloadHints* pHints); - bool CheckPages(DownloadHints* pHints); - bool CheckPage(DownloadHints* pHints); + bool CheckRoot(); + bool CheckInfo(); + bool CheckPages(); + bool CheckPage(); bool CheckResources(); - bool CheckAnnots(DownloadHints* pHints); - bool CheckAcroForm(DownloadHints* pHints); + bool CheckAcroForm(); bool CheckAcroFormSubObject(); bool CheckTrailerAppend(DownloadHints* pHints); bool CheckPageStatus(DownloadHints* pHints); @@ -160,7 +159,6 @@ class CPDF_DataAvail final { uint32_t objnum, CPDF_IndirectObjectHolder* pObjList = nullptr); std::unique_ptr GetObject(uint32_t objnum, - DownloadHints* pHints, bool* pExistInFile); bool GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages); bool PreparePageItem(); @@ -172,20 +170,15 @@ class CPDF_DataAvail final { DocAvailStatus CheckLinearizedFirstPage(uint32_t dwPage); bool CheckPage(uint32_t dwPage, DownloadHints* pHints); - bool LoadDocPages(DownloadHints* pHints); - bool LoadDocPage(uint32_t dwPage, DownloadHints* pHints); + bool LoadDocPages(); + bool LoadDocPage(uint32_t dwPage); bool CheckPageNode(const PageNode& pageNode, int32_t iPage, int32_t& iCount, - DownloadHints* pHints, int level); - bool CheckUnknownPageNode(uint32_t dwPageNo, - PageNode* pPageNode, - DownloadHints* pHints); - bool CheckArrayPageNode(uint32_t dwPageNo, - PageNode* pPageNode, - DownloadHints* pHints); - bool CheckPageCount(DownloadHints* pHints); + bool CheckUnknownPageNode(uint32_t dwPageNo, PageNode* pPageNode); + bool CheckArrayPageNode(uint32_t dwPageNo, PageNode* pPageNode); + bool CheckPageCount(); bool IsFirstCheck(uint32_t dwPage); void ResetFirstCheck(uint32_t dwPage); bool ValidatePage(uint32_t dwPage); -- cgit v1.2.3