// Copyright 2016 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #include "core/fpdfapi/fpdf_parser/include/ipdf_data_avail.h" #include "core/fpdfapi/fpdf_parser/cpdf_data_avail.h" #include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h" #include "core/fpdfapi/fpdf_parser/include/cpdf_array.h" #include "core/fpdfapi/fpdf_parser/include/cpdf_dictionary.h" #include "core/fpdfapi/fpdf_parser/include/cpdf_document.h" #include "core/fpdfapi/fpdf_parser/include/cpdf_name.h" #include "core/fpdfapi/fpdf_parser/include/cpdf_number.h" #include "core/fpdfapi/fpdf_parser/include/cpdf_reference.h" #include "core/fpdfapi/fpdf_parser/include/cpdf_stream.h" #include "core/fpdfapi/include/cpdf_modulemgr.h" #include "core/fxcrt/include/fx_ext.h" #include "core/fxcrt/include/fx_safe_types.h" #include "third_party/base/stl_util.h" IPDF_DataAvail::IPDF_DataAvail(IPDF_DataAvail::FileAvail* pFileAvail, IFX_FileRead* pFileRead) : m_pFileAvail(pFileAvail), m_pFileRead(pFileRead) {} IPDF_DataAvail::~IPDF_DataAvail() {} IPDF_DataAvail::FileAvail::~FileAvail() {} IPDF_DataAvail::DownloadHints::~DownloadHints() {} // static IPDF_DataAvail* IPDF_DataAvail::Create(IPDF_DataAvail::FileAvail* pFileAvail, IFX_FileRead* pFileRead) { return new CPDF_DataAvail(pFileAvail, pFileRead, TRUE); } // static int CPDF_DataAvail::s_CurrentDataAvailRecursionDepth = 0; CPDF_DataAvail::CPDF_DataAvail(IPDF_DataAvail::FileAvail* pFileAvail, IFX_FileRead* pFileRead, FX_BOOL bSupportHintTable) : IPDF_DataAvail(pFileAvail, pFileRead) { m_Pos = 0; m_dwFileLen = 0; if (m_pFileRead) { m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize(); } m_dwCurrentOffset = 0; m_dwXRefOffset = 0; m_bufferOffset = 0; m_dwFirstPageNo = 0; m_bufferSize = 0; m_PagesObjNum = 0; m_dwCurrentXRefSteam = 0; m_dwAcroFormObjNum = 0; m_dwInfoObjNum = 0; m_pDocument = 0; m_dwEncryptObjNum = 0; m_dwPrevXRefOffset = 0; m_dwLastXRefOffset = 0; m_bDocAvail = FALSE; m_bMainXRefLoadTried = FALSE; m_bDocAvail = FALSE; m_bLinearized = FALSE; m_bPagesLoad = FALSE; m_bPagesTreeLoad = FALSE; m_bMainXRefLoadedOK = FALSE; m_bAnnotsLoad = FALSE; m_bHaveAcroForm = FALSE; m_bAcroFormLoad = FALSE; m_bPageLoadedOK = FALSE; m_bNeedDownLoadResource = FALSE; m_bLinearizedFormParamLoad = FALSE; m_pLinearized = NULL; m_pRoot = NULL; m_pTrailer = NULL; m_pCurrentParser = NULL; m_pAcroForm = NULL; m_pPageDict = NULL; m_pPageResource = NULL; m_docStatus = PDF_DATAAVAIL_HEADER; m_parser.m_bOwnFileRead = false; m_bTotalLoadPageTree = FALSE; m_bCurPageDictLoadOK = FALSE; m_bLinearedDataOK = FALSE; m_bSupportHintTable = bSupportHintTable; } CPDF_DataAvail::~CPDF_DataAvail() { if (m_pLinearized) m_pLinearized->Release(); if (m_pRoot) m_pRoot->Release(); if (m_pTrailer) m_pTrailer->Release(); int iSize = m_arrayAcroforms.GetSize(); for (int i = 0; i < iSize; ++i) m_arrayAcroforms.GetAt(i)->Release(); } void CPDF_DataAvail::SetDocument(CPDF_Document* pDoc) { m_pDocument = pDoc; } FX_DWORD CPDF_DataAvail::GetObjectSize(FX_DWORD objnum, FX_FILESIZE& offset) { CPDF_Parser* pParser = m_pDocument->GetParser(); if (!pParser || !pParser->IsValidObjectNumber(objnum)) return 0; if (pParser->GetObjectType(objnum) == 2) objnum = pParser->GetObjectPositionOrZero(objnum); if (pParser->GetObjectType(objnum) != 1 && pParser->GetObjectType(objnum) != 255) { return 0; } offset = pParser->GetObjectPositionOrZero(objnum); if (offset == 0) return 0; auto it = pParser->m_SortedOffset.find(offset); if (it == pParser->m_SortedOffset.end() || ++it == pParser->m_SortedOffset.end()) { return 0; } return *it - offset; } FX_BOOL CPDF_DataAvail::IsObjectsAvail( CFX_ArrayTemplate& obj_array, FX_BOOL bParsePage, IPDF_DataAvail::DownloadHints* pHints, CFX_ArrayTemplate& ret_array) { if (!obj_array.GetSize()) return TRUE; FX_DWORD count = 0; CFX_ArrayTemplate new_obj_array; int32_t i = 0; for (i = 0; i < obj_array.GetSize(); i++) { CPDF_Object* pObj = obj_array[i]; if (!pObj) continue; int32_t type = pObj->GetType(); switch (type) { case CPDF_Object::ARRAY: { CPDF_Array* pArray = pObj->GetArray(); for (FX_DWORD k = 0; k < pArray->GetCount(); ++k) new_obj_array.Add(pArray->GetElement(k)); } break; case CPDF_Object::STREAM: pObj = pObj->GetDict(); case CPDF_Object::DICTIONARY: { CPDF_Dictionary* pDict = pObj->GetDict(); if (pDict && pDict->GetStringBy("Type") == "Page" && !bParsePage) continue; for (const auto& it : *pDict) { const CFX_ByteString& key = it.first; CPDF_Object* value = it.second; if (key != "Parent") new_obj_array.Add(value); } } break; case CPDF_Object::REFERENCE: { CPDF_Reference* pRef = pObj->AsReference(); FX_DWORD dwNum = pRef->GetRefObjNum(); FX_FILESIZE offset; FX_DWORD size = GetObjectSize(dwNum, offset); if (size == 0 || offset < 0 || offset >= m_dwFileLen) break; if (!IsDataAvail(offset, size, pHints)) { ret_array.Add(pObj); count++; } else if (!pdfium::ContainsKey(m_ObjectSet, dwNum)) { m_ObjectSet.insert(dwNum); CPDF_Object* pReferred = m_pDocument->GetIndirectObject(pRef->GetRefObjNum()); if (pReferred) new_obj_array.Add(pReferred); } } break; } } if (count > 0) { int32_t iSize = new_obj_array.GetSize(); for (i = 0; i < iSize; ++i) { CPDF_Object* pObj = new_obj_array[i]; if (CPDF_Reference* pRef = pObj->AsReference()) { FX_DWORD dwNum = pRef->GetRefObjNum(); if (!pdfium::ContainsKey(m_ObjectSet, dwNum)) ret_array.Add(pObj); } else { ret_array.Add(pObj); } } return FALSE; } obj_array.RemoveAll(); obj_array.Append(new_obj_array); return IsObjectsAvail(obj_array, FALSE, pHints, ret_array); } IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail( IPDF_DataAvail::DownloadHints* pHints) { if (!m_dwFileLen && m_pFileRead) { m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize(); if (!m_dwFileLen) return DataError; } while (!m_bDocAvail) { if (!CheckDocStatus(pHints)) return DataNotAvailable; } return DataAvailable; } FX_BOOL CPDF_DataAvail::CheckAcroFormSubObject( IPDF_DataAvail::DownloadHints* pHints) { if (!m_objs_array.GetSize()) { m_objs_array.RemoveAll(); m_ObjectSet.clear(); CFX_ArrayTemplate obj_array; obj_array.Append(m_arrayAcroforms); FX_BOOL bRet = IsObjectsAvail(obj_array, FALSE, pHints, m_objs_array); if (bRet) m_objs_array.RemoveAll(); return bRet; } CFX_ArrayTemplate new_objs_array; FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); if (bRet) { int32_t iSize = m_arrayAcroforms.GetSize(); for (int32_t i = 0; i < iSize; ++i) { m_arrayAcroforms.GetAt(i)->Release(); } m_arrayAcroforms.RemoveAll(); } else { m_objs_array.RemoveAll(); m_objs_array.Append(new_objs_array); } return bRet; } FX_BOOL CPDF_DataAvail::CheckAcroForm(IPDF_DataAvail::DownloadHints* pHints) { FX_BOOL bExist = FALSE; m_pAcroForm = GetObject(m_dwAcroFormObjNum, pHints, &bExist); if (!bExist) { m_docStatus = PDF_DATAAVAIL_PAGETREE; return TRUE; } if (!m_pAcroForm) { if (m_docStatus == PDF_DATAAVAIL_ERROR) { m_docStatus = PDF_DATAAVAIL_LOADALLFILE; return TRUE; } return FALSE; } m_arrayAcroforms.Add(m_pAcroForm); m_docStatus = PDF_DATAAVAIL_PAGETREE; return TRUE; } FX_BOOL CPDF_DataAvail::CheckDocStatus(IPDF_DataAvail::DownloadHints* pHints) { switch (m_docStatus) { case PDF_DATAAVAIL_HEADER: return CheckHeader(pHints); case PDF_DATAAVAIL_FIRSTPAGE: case PDF_DATAAVAIL_FIRSTPAGE_PREPARE: return CheckFirstPage(pHints); case PDF_DATAAVAIL_HINTTABLE: return CheckHintTables(pHints); case PDF_DATAAVAIL_END: return CheckEnd(pHints); case PDF_DATAAVAIL_CROSSREF: return CheckCrossRef(pHints); case PDF_DATAAVAIL_CROSSREF_ITEM: return CheckCrossRefItem(pHints); case PDF_DATAAVAIL_CROSSREF_STREAM: return CheckAllCrossRefStream(pHints); case PDF_DATAAVAIL_TRAILER: return CheckTrailer(pHints); case PDF_DATAAVAIL_TRAILER_APPEND: return CheckTrailerAppend(pHints); case PDF_DATAAVAIL_LOADALLCROSSREF: return LoadAllXref(pHints); case PDF_DATAAVAIL_LOADALLFILE: return LoadAllFile(pHints); case PDF_DATAAVAIL_ROOT: return CheckRoot(pHints); case PDF_DATAAVAIL_INFO: return CheckInfo(pHints); case PDF_DATAAVAIL_ACROFORM: return CheckAcroForm(pHints); case PDF_DATAAVAIL_PAGETREE: if (m_bTotalLoadPageTree) return CheckPages(pHints); return LoadDocPages(pHints); case PDF_DATAAVAIL_PAGE: if (m_bTotalLoadPageTree) return CheckPage(pHints); m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD; return TRUE; case PDF_DATAAVAIL_ERROR: return LoadAllFile(pHints); case PDF_DATAAVAIL_PAGE_LATERLOAD: m_docStatus = PDF_DATAAVAIL_PAGE; default: m_bDocAvail = TRUE; return TRUE; } } FX_BOOL CPDF_DataAvail::CheckPageStatus(IPDF_DataAvail::DownloadHints* pHints) { switch (m_docStatus) { case PDF_DATAAVAIL_PAGETREE: return CheckPages(pHints); case PDF_DATAAVAIL_PAGE: return CheckPage(pHints); case PDF_DATAAVAIL_ERROR: return LoadAllFile(pHints); default: m_bPagesTreeLoad = TRUE; m_bPagesLoad = TRUE; return TRUE; } } FX_BOOL CPDF_DataAvail::LoadAllFile(IPDF_DataAvail::DownloadHints* pHints) { if (m_pFileAvail->IsDataAvail(0, (FX_DWORD)m_dwFileLen)) { m_docStatus = PDF_DATAAVAIL_DONE; return TRUE; } pHints->AddSegment(0, (FX_DWORD)m_dwFileLen); return FALSE; } FX_BOOL CPDF_DataAvail::LoadAllXref(IPDF_DataAvail::DownloadHints* pHints) { m_parser.m_pSyntax->InitParser(m_pFileRead, (FX_DWORD)m_dwHeaderOffset); m_parser.m_bOwnFileRead = false; if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) && !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) { m_docStatus = PDF_DATAAVAIL_LOADALLFILE; return FALSE; } m_dwRootObjNum = m_parser.GetRootObjNum(); m_dwInfoObjNum = m_parser.GetInfoObjNum(); m_pCurrentParser = &m_parser; m_docStatus = PDF_DATAAVAIL_ROOT; return TRUE; } CPDF_Object* CPDF_DataAvail::GetObject(FX_DWORD objnum, IPDF_DataAvail::DownloadHints* pHints, FX_BOOL* pExistInFile) { CPDF_Object* pRet = nullptr; FX_DWORD size = 0; FX_FILESIZE offset = 0; CPDF_Parser* pParser = nullptr; if (pExistInFile) *pExistInFile = TRUE; if (m_pDocument) { size = GetObjectSize(objnum, offset); pParser = m_pDocument->GetParser(); } else { size = (FX_DWORD)m_parser.GetObjectSize(objnum); offset = m_parser.GetObjectOffset(objnum); pParser = &m_parser; } if (!IsDataAvail(offset, size, pHints)) return nullptr; if (pParser) pRet = pParser->ParseIndirectObject(nullptr, objnum); if (!pRet && pExistInFile) *pExistInFile = FALSE; return pRet; } FX_BOOL CPDF_DataAvail::CheckInfo(IPDF_DataAvail::DownloadHints* pHints) { FX_BOOL bExist = FALSE; CPDF_Object* pInfo = GetObject(m_dwInfoObjNum, pHints, &bExist); if (!bExist) { m_docStatus = (m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE); return TRUE; } if (!pInfo) { if (m_docStatus == PDF_DATAAVAIL_ERROR) { m_docStatus = PDF_DATAAVAIL_LOADALLFILE; return TRUE; } if (m_Pos == m_dwFileLen) m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } if (pInfo) pInfo->Release(); m_docStatus = (m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE); return TRUE; } FX_BOOL CPDF_DataAvail::CheckRoot(IPDF_DataAvail::DownloadHints* pHints) { FX_BOOL bExist = FALSE; m_pRoot = GetObject(m_dwRootObjNum, pHints, &bExist); if (!bExist) { m_docStatus = PDF_DATAAVAIL_LOADALLFILE; return TRUE; } if (!m_pRoot) { if (m_docStatus == PDF_DATAAVAIL_ERROR) { m_docStatus = PDF_DATAAVAIL_LOADALLFILE; return TRUE; } return FALSE; } CPDF_Dictionary* pDict = m_pRoot->GetDict(); if (!pDict) { m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } CPDF_Reference* pRef = ToReference(pDict->GetElement("Pages")); if (!pRef) { m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } m_PagesObjNum = pRef->GetRefObjNum(); CPDF_Reference* pAcroFormRef = ToReference(m_pRoot->GetDict()->GetElement("AcroForm")); if (pAcroFormRef) { m_bHaveAcroForm = TRUE; m_dwAcroFormObjNum = pAcroFormRef->GetRefObjNum(); } if (m_dwInfoObjNum) { m_docStatus = PDF_DATAAVAIL_INFO; } else { m_docStatus = m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE; } return TRUE; } FX_BOOL CPDF_DataAvail::PreparePageItem() { CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); CPDF_Reference* pRef = ToReference(pRoot ? pRoot->GetElement("Pages") : nullptr); if (!pRef) { m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } m_PagesObjNum = pRef->GetRefObjNum(); m_pCurrentParser = m_pDocument->GetParser(); m_docStatus = PDF_DATAAVAIL_PAGETREE; return TRUE; } bool CPDF_DataAvail::IsFirstCheck(int iPage) { return m_pageMapCheckState.insert(iPage).second; } void CPDF_DataAvail::ResetFirstCheck(int iPage) { m_pageMapCheckState.erase(iPage); } FX_BOOL CPDF_DataAvail::CheckPage(IPDF_DataAvail::DownloadHints* pHints) { FX_DWORD iPageObjs = m_PageObjList.GetSize(); CFX_ArrayTemplate UnavailObjList; for (FX_DWORD i = 0; i < iPageObjs; ++i) { FX_DWORD dwPageObjNum = m_PageObjList.GetAt(i); FX_BOOL bExist = FALSE; CPDF_Object* pObj = GetObject(dwPageObjNum, pHints, &bExist); if (!pObj) { if (bExist) UnavailObjList.Add(dwPageObjNum); continue; } if (pObj->IsArray()) { CPDF_Array* pArray = pObj->GetArray(); if (pArray) { int32_t iSize = pArray->GetCount(); for (int32_t j = 0; j < iSize; ++j) { if (CPDF_Reference* pRef = ToReference(pArray->GetElement(j))) UnavailObjList.Add(pRef->GetRefObjNum()); } } } if (!pObj->IsDictionary()) { pObj->Release(); continue; } CFX_ByteString type = pObj->GetDict()->GetStringBy("Type"); if (type == "Pages") { m_PagesArray.Add(pObj); continue; } pObj->Release(); } m_PageObjList.RemoveAll(); if (UnavailObjList.GetSize()) { m_PageObjList.Append(UnavailObjList); return FALSE; } FX_DWORD iPages = m_PagesArray.GetSize(); for (FX_DWORD i = 0; i < iPages; i++) { CPDF_Object* pPages = m_PagesArray.GetAt(i); if (!pPages) continue; if (!GetPageKids(m_pCurrentParser, pPages)) { pPages->Release(); while (++i < iPages) { pPages = m_PagesArray.GetAt(i); pPages->Release(); } m_PagesArray.RemoveAll(); m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } pPages->Release(); } m_PagesArray.RemoveAll(); if (!m_PageObjList.GetSize()) m_docStatus = PDF_DATAAVAIL_DONE; return TRUE; } FX_BOOL CPDF_DataAvail::GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages) { if (!pParser) { m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } CPDF_Dictionary* pDict = pPages->GetDict(); CPDF_Object* pKids = pDict ? pDict->GetElement("Kids") : NULL; if (!pKids) return TRUE; switch (pKids->GetType()) { case CPDF_Object::REFERENCE: m_PageObjList.Add(pKids->AsReference()->GetRefObjNum()); break; case CPDF_Object::ARRAY: { CPDF_Array* pKidsArray = pKids->AsArray(); for (FX_DWORD i = 0; i < pKidsArray->GetCount(); ++i) { if (CPDF_Reference* pRef = ToReference(pKidsArray->GetElement(i))) m_PageObjList.Add(pRef->GetRefObjNum()); } } break; default: m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } return TRUE; } FX_BOOL CPDF_DataAvail::CheckPages(IPDF_DataAvail::DownloadHints* pHints) { FX_BOOL bExist = FALSE; CPDF_Object* pPages = GetObject(m_PagesObjNum, pHints, &bExist); if (!bExist) { m_docStatus = PDF_DATAAVAIL_LOADALLFILE; return TRUE; } if (!pPages) { if (m_docStatus == PDF_DATAAVAIL_ERROR) { m_docStatus = PDF_DATAAVAIL_LOADALLFILE; return TRUE; } return FALSE; } if (!GetPageKids(m_pCurrentParser, pPages)) { pPages->Release(); m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } pPages->Release(); m_docStatus = PDF_DATAAVAIL_PAGE; return TRUE; } FX_BOOL CPDF_DataAvail::CheckHeader(IPDF_DataAvail::DownloadHints* pHints) { FX_DWORD req_size = 1024; if ((FX_FILESIZE)req_size > m_dwFileLen) req_size = (FX_DWORD)m_dwFileLen; if (m_pFileAvail->IsDataAvail(0, req_size)) { uint8_t buffer[1024]; m_pFileRead->ReadBlock(buffer, 0, req_size); if (IsLinearizedFile(buffer, req_size)) { m_docStatus = PDF_DATAAVAIL_FIRSTPAGE; } else { if (m_docStatus == PDF_DATAAVAIL_ERROR) return FALSE; m_docStatus = PDF_DATAAVAIL_END; } return TRUE; } pHints->AddSegment(0, req_size); return FALSE; } FX_BOOL CPDF_DataAvail::CheckFirstPage(IPDF_DataAvail::DownloadHints* pHints) { CPDF_Dictionary* pDict = m_pLinearized->GetDict(); CPDF_Object* pEndOffSet = pDict ? pDict->GetElement("E") : NULL; if (!pEndOffSet) { m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } CPDF_Object* pXRefOffset = pDict ? pDict->GetElement("T") : NULL; if (!pXRefOffset) { m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } CPDF_Object* pFileLen = pDict ? pDict->GetElement("L") : NULL; if (!pFileLen) { m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } FX_BOOL bNeedDownLoad = FALSE; if (pEndOffSet->IsNumber()) { FX_DWORD dwEnd = pEndOffSet->GetInteger(); dwEnd += 512; if ((FX_FILESIZE)dwEnd > m_dwFileLen) dwEnd = (FX_DWORD)m_dwFileLen; int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen); int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0; if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) { pHints->AddSegment(iStartPos, iSize); bNeedDownLoad = TRUE; } } m_dwLastXRefOffset = 0; FX_FILESIZE dwFileLen = 0; if (pXRefOffset->IsNumber()) m_dwLastXRefOffset = pXRefOffset->GetInteger(); if (pFileLen->IsNumber()) dwFileLen = pFileLen->GetInteger(); if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, (FX_DWORD)(dwFileLen - m_dwLastXRefOffset))) { if (m_docStatus == PDF_DATAAVAIL_FIRSTPAGE) { FX_DWORD dwSize = (FX_DWORD)(dwFileLen - m_dwLastXRefOffset); FX_FILESIZE offset = m_dwLastXRefOffset; if (dwSize < 512 && dwFileLen > 512) { dwSize = 512; offset = dwFileLen - 512; } pHints->AddSegment(offset, dwSize); } } else { m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE; } if (bNeedDownLoad || m_docStatus != PDF_DATAAVAIL_FIRSTPAGE_PREPARE) { m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE; return FALSE; } m_docStatus = m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE; return TRUE; } FX_BOOL CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset, FX_DWORD size, IPDF_DataAvail::DownloadHints* pHints) { if (offset > m_dwFileLen) return TRUE; FX_SAFE_DWORD safeSize = pdfium::base::checked_cast(offset); safeSize += size; safeSize += 512; if (!safeSize.IsValid() || safeSize.ValueOrDie() > m_dwFileLen) size = m_dwFileLen - offset; else size += 512; if (!m_pFileAvail->IsDataAvail(offset, size)) { pHints->AddSegment(offset, size); return FALSE; } return TRUE; } FX_BOOL CPDF_DataAvail::CheckHintTables(IPDF_DataAvail::DownloadHints* pHints) { CPDF_Dictionary* pDict = m_pLinearized->GetDict(); if (!pDict) { m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } if (!pDict->KeyExist("H") || !pDict->KeyExist("O") || !pDict->KeyExist("N")) { m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } int nPageCount = pDict->GetElementValue("N")->GetInteger(); if (nPageCount <= 1) { m_docStatus = PDF_DATAAVAIL_DONE; return TRUE; } CPDF_Array* pHintStreamRange = pDict->GetArrayBy("H"); FX_FILESIZE szHSStart = pHintStreamRange->GetElementValue(0) ? pHintStreamRange->GetElementValue(0)->GetInteger() : 0; FX_FILESIZE szHSLength = pHintStreamRange->GetElementValue(1) ? pHintStreamRange->GetElementValue(1)->GetInteger() : 0; if (szHSStart < 0 || szHSLength <= 0) { m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } if (!IsDataAvail(szHSStart, szHSLength, pHints)) return FALSE; m_syntaxParser.InitParser(m_pFileRead, m_dwHeaderOffset); std::unique_ptr pHintTables( new CPDF_HintTables(this, pDict)); std::unique_ptr> pHintStream( ParseIndirectObjectAt(szHSStart, 0)); CPDF_Stream* pStream = ToStream(pHintStream.get()); if (pStream && pHintTables->LoadHintStream(pStream)) m_pHintTables = std::move(pHintTables); m_docStatus = PDF_DATAAVAIL_DONE; return TRUE; } CPDF_Object* CPDF_DataAvail::ParseIndirectObjectAt( FX_FILESIZE pos, FX_DWORD objnum, CPDF_IndirectObjectHolder* pObjList) { FX_FILESIZE SavedPos = m_syntaxParser.SavePos(); m_syntaxParser.RestorePos(pos); bool bIsNumber; CFX_ByteString word = m_syntaxParser.GetNextWord(&bIsNumber); if (!bIsNumber) return nullptr; FX_DWORD parser_objnum = FXSYS_atoui(word); if (objnum && parser_objnum != objnum) return nullptr; word = m_syntaxParser.GetNextWord(&bIsNumber); if (!bIsNumber) return nullptr; FX_DWORD gennum = FXSYS_atoui(word); if (m_syntaxParser.GetKeyword() != "obj") { m_syntaxParser.RestorePos(SavedPos); return nullptr; } CPDF_Object* pObj = m_syntaxParser.GetObject(pObjList, parser_objnum, gennum, true); m_syntaxParser.RestorePos(SavedPos); return pObj; } IPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() { FX_DWORD req_size = 1024; if (!m_pFileAvail->IsDataAvail(0, req_size)) return LinearizationUnknown; if (!m_pFileRead) return NotLinearized; FX_FILESIZE dwSize = m_pFileRead->GetSize(); if (dwSize < (FX_FILESIZE)req_size) return LinearizationUnknown; uint8_t buffer[1024]; m_pFileRead->ReadBlock(buffer, 0, req_size); if (IsLinearizedFile(buffer, req_size)) return Linearized; return NotLinearized; } FX_BOOL CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, FX_DWORD dwLen) { ScopedFileStream file(FX_CreateMemoryStream(pData, (size_t)dwLen, FALSE)); int32_t offset = GetHeaderOffset(file.get()); if (offset == -1) { m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } m_dwHeaderOffset = offset; m_syntaxParser.InitParser(file.get(), offset); m_syntaxParser.RestorePos(m_syntaxParser.m_HeaderOffset + 9); bool bNumber; CFX_ByteString wordObjNum = m_syntaxParser.GetNextWord(&bNumber); if (!bNumber) return FALSE; FX_DWORD objnum = FXSYS_atoui(wordObjNum); if (m_pLinearized) { m_pLinearized->Release(); m_pLinearized = nullptr; } m_pLinearized = ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum); if (!m_pLinearized) return FALSE; CPDF_Dictionary* pDict = m_pLinearized->GetDict(); if (pDict && pDict->GetElement("Linearized")) { CPDF_Object* pLen = pDict->GetElement("L"); if (!pLen) return FALSE; if ((FX_FILESIZE)pLen->GetInteger() != m_pFileRead->GetSize()) return FALSE; m_bLinearized = TRUE; if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P"))) m_dwFirstPageNo = pNo->GetInteger(); return TRUE; } return FALSE; } FX_BOOL CPDF_DataAvail::CheckEnd(IPDF_DataAvail::DownloadHints* pHints) { FX_DWORD req_pos = (FX_DWORD)(m_dwFileLen > 1024 ? m_dwFileLen - 1024 : 0); FX_DWORD dwSize = (FX_DWORD)(m_dwFileLen - req_pos); if (m_pFileAvail->IsDataAvail(req_pos, dwSize)) { uint8_t buffer[1024]; m_pFileRead->ReadBlock(buffer, req_pos, dwSize); ScopedFileStream file(FX_CreateMemoryStream(buffer, (size_t)dwSize, FALSE)); m_syntaxParser.InitParser(file.get(), 0); m_syntaxParser.RestorePos(dwSize - 1); if (m_syntaxParser.SearchWord("startxref", TRUE, FALSE, dwSize)) { m_syntaxParser.GetNextWord(nullptr); bool bNumber; CFX_ByteString xrefpos_str = m_syntaxParser.GetNextWord(&bNumber); if (!bNumber) { m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } m_dwXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str); if (!m_dwXRefOffset || m_dwXRefOffset > m_dwFileLen) { m_docStatus = PDF_DATAAVAIL_LOADALLFILE; return TRUE; } m_dwLastXRefOffset = m_dwXRefOffset; SetStartOffset(m_dwXRefOffset); m_docStatus = PDF_DATAAVAIL_CROSSREF; return TRUE; } m_docStatus = PDF_DATAAVAIL_LOADALLFILE; return TRUE; } pHints->AddSegment(req_pos, dwSize); return FALSE; } int32_t CPDF_DataAvail::CheckCrossRefStream( IPDF_DataAvail::DownloadHints* pHints, FX_FILESIZE& xref_offset) { xref_offset = 0; FX_DWORD req_size = (FX_DWORD)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); if (m_pFileAvail->IsDataAvail(m_Pos, req_size)) { int32_t iSize = (int32_t)(m_Pos + req_size - m_dwCurrentXRefSteam); CFX_BinaryBuf buf(iSize); uint8_t* pBuf = buf.GetBuffer(); m_pFileRead->ReadBlock(pBuf, m_dwCurrentXRefSteam, iSize); ScopedFileStream file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE)); m_parser.m_pSyntax->InitParser(file.get(), 0); bool bNumber; CFX_ByteString objnum = m_parser.m_pSyntax->GetNextWord(&bNumber); if (!bNumber) return -1; FX_DWORD objNum = FXSYS_atoui(objnum); CPDF_Object* pObj = m_parser.ParseIndirectObjectAt(nullptr, 0, objNum); if (!pObj) { m_Pos += m_parser.m_pSyntax->SavePos(); return 0; } CPDF_Dictionary* pDict = pObj->GetDict(); CPDF_Name* pName = ToName(pDict ? pDict->GetElement("Type") : nullptr); if (pName) { if (pName->GetString() == "XRef") { m_Pos += m_parser.m_pSyntax->SavePos(); xref_offset = pObj->GetDict()->GetIntegerBy("Prev"); pObj->Release(); return 1; } } pObj->Release(); return -1; } pHints->AddSegment(m_Pos, req_size); return 0; } inline void CPDF_DataAvail::SetStartOffset(FX_FILESIZE dwOffset) { m_Pos = dwOffset; } FX_BOOL CPDF_DataAvail::GetNextToken(CFX_ByteString& token) { uint8_t ch; if (!GetNextChar(ch)) return FALSE; while (1) { while (PDFCharIsWhitespace(ch)) { if (!GetNextChar(ch)) return FALSE; } if (ch != '%') break; while (1) { if (!GetNextChar(ch)) return FALSE; if (PDFCharIsLineEnding(ch)) break; } } uint8_t buffer[256]; FX_DWORD index = 0; if (PDFCharIsDelimiter(ch)) { buffer[index++] = ch; if (ch == '/') { while (1) { if (!GetNextChar(ch)) return FALSE; if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { m_Pos--; CFX_ByteString ret(buffer, index); token = ret; return TRUE; } if (index < sizeof(buffer)) buffer[index++] = ch; } } else if (ch == '<') { if (!GetNextChar(ch)) return FALSE; if (ch == '<') buffer[index++] = ch; else m_Pos--; } else if (ch == '>') { if (!GetNextChar(ch)) return FALSE; if (ch == '>') buffer[index++] = ch; else m_Pos--; } CFX_ByteString ret(buffer, index); token = ret; return TRUE; } while (1) { if (index < sizeof(buffer)) buffer[index++] = ch; if (!GetNextChar(ch)) return FALSE; if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_Pos--; break; } } token = CFX_ByteString(buffer, index); return TRUE; } FX_BOOL CPDF_DataAvail::GetNextChar(uint8_t& ch) { FX_FILESIZE pos = m_Pos; if (pos >= m_dwFileLen) return FALSE; if (m_bufferOffset >= pos || (FX_FILESIZE)(m_bufferOffset + m_bufferSize) <= pos) { FX_FILESIZE read_pos = pos; FX_DWORD read_size = 512; if ((FX_FILESIZE)read_size > m_dwFileLen) read_size = (FX_DWORD)m_dwFileLen; if ((FX_FILESIZE)(read_pos + read_size) > m_dwFileLen) read_pos = m_dwFileLen - read_size; if (!m_pFileRead->ReadBlock(m_bufferData, read_pos, read_size)) return FALSE; m_bufferOffset = read_pos; m_bufferSize = read_size; } ch = m_bufferData[pos - m_bufferOffset]; m_Pos++; return TRUE; } FX_BOOL CPDF_DataAvail::CheckCrossRefItem( IPDF_DataAvail::DownloadHints* pHints) { int32_t iSize = 0; CFX_ByteString token; while (1) { if (!GetNextToken(token)) { iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); pHints->AddSegment(m_Pos, iSize); return FALSE; } if (token == "trailer") { m_dwTrailerOffset = m_Pos; m_docStatus = PDF_DATAAVAIL_TRAILER; return TRUE; } } } FX_BOOL CPDF_DataAvail::CheckAllCrossRefStream( IPDF_DataAvail::DownloadHints* pHints) { FX_FILESIZE xref_offset = 0; int32_t nRet = CheckCrossRefStream(pHints, xref_offset); if (nRet == 1) { if (!xref_offset) { m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF; } else { m_dwCurrentXRefSteam = xref_offset; m_Pos = xref_offset; } return TRUE; } if (nRet == -1) m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } FX_BOOL CPDF_DataAvail::CheckCrossRef(IPDF_DataAvail::DownloadHints* pHints) { int32_t iSize = 0; CFX_ByteString token; if (!GetNextToken(token)) { iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); pHints->AddSegment(m_Pos, iSize); return FALSE; } if (token == "xref") { while (1) { if (!GetNextToken(token)) { iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); pHints->AddSegment(m_Pos, iSize); m_docStatus = PDF_DATAAVAIL_CROSSREF_ITEM; return FALSE; } if (token == "trailer") { m_dwTrailerOffset = m_Pos; m_docStatus = PDF_DATAAVAIL_TRAILER; return TRUE; } } } else { m_docStatus = PDF_DATAAVAIL_LOADALLFILE; return TRUE; } return FALSE; } FX_BOOL CPDF_DataAvail::CheckTrailerAppend( IPDF_DataAvail::DownloadHints* pHints) { if (m_Pos < m_dwFileLen) { FX_FILESIZE dwAppendPos = m_Pos + m_syntaxParser.SavePos(); int32_t iSize = (int32_t)( dwAppendPos + 512 > m_dwFileLen ? m_dwFileLen - dwAppendPos : 512); if (!m_pFileAvail->IsDataAvail(dwAppendPos, iSize)) { pHints->AddSegment(dwAppendPos, iSize); return FALSE; } } if (m_dwPrevXRefOffset) { SetStartOffset(m_dwPrevXRefOffset); m_docStatus = PDF_DATAAVAIL_CROSSREF; } else { m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF; } return TRUE; } FX_BOOL CPDF_DataAvail::CheckTrailer(IPDF_DataAvail::DownloadHints* pHints) { int32_t iTrailerSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); if (m_pFileAvail->IsDataAvail(m_Pos, iTrailerSize)) { int32_t iSize = (int32_t)(m_Pos + iTrailerSize - m_dwTrailerOffset); CFX_BinaryBuf buf(iSize); uint8_t* pBuf = buf.GetBuffer(); if (!pBuf) { m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } if (!m_pFileRead->ReadBlock(pBuf, m_dwTrailerOffset, iSize)) return FALSE; ScopedFileStream file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE)); m_syntaxParser.InitParser(file.get(), 0); std::unique_ptr> pTrailer( m_syntaxParser.GetObject(nullptr, 0, 0, true)); if (!pTrailer) { m_Pos += m_syntaxParser.SavePos(); pHints->AddSegment(m_Pos, iTrailerSize); return FALSE; } if (!pTrailer->IsDictionary()) return FALSE; CPDF_Dictionary* pTrailerDict = pTrailer->GetDict(); CPDF_Object* pEncrypt = pTrailerDict->GetElement("Encrypt"); if (ToReference(pEncrypt)) { m_docStatus = PDF_DATAAVAIL_LOADALLFILE; return TRUE; } FX_DWORD xrefpos = GetDirectInteger(pTrailerDict, "Prev"); if (xrefpos) { m_dwPrevXRefOffset = GetDirectInteger(pTrailerDict, "XRefStm"); if (m_dwPrevXRefOffset) { m_docStatus = PDF_DATAAVAIL_LOADALLFILE; } else { m_dwPrevXRefOffset = xrefpos; if (m_dwPrevXRefOffset >= m_dwFileLen) { m_docStatus = PDF_DATAAVAIL_LOADALLFILE; } else { SetStartOffset(m_dwPrevXRefOffset); m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND; } } return TRUE; } m_dwPrevXRefOffset = 0; m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND; return TRUE; } pHints->AddSegment(m_Pos, iTrailerSize); return FALSE; } FX_BOOL CPDF_DataAvail::CheckPage(int32_t iPage, IPDF_DataAvail::DownloadHints* pHints) { while (TRUE) { switch (m_docStatus) { case PDF_DATAAVAIL_PAGETREE: if (!LoadDocPages(pHints)) return FALSE; break; case PDF_DATAAVAIL_PAGE: if (!LoadDocPage(iPage, pHints)) return FALSE; break; case PDF_DATAAVAIL_ERROR: return LoadAllFile(pHints); default: m_bPagesTreeLoad = TRUE; m_bPagesLoad = TRUE; m_bCurPageDictLoadOK = TRUE; m_docStatus = PDF_DATAAVAIL_PAGE; return TRUE; } } } FX_BOOL CPDF_DataAvail::CheckArrayPageNode( FX_DWORD dwPageNo, CPDF_DataAvail::PageNode* pPageNode, IPDF_DataAvail::DownloadHints* pHints) { FX_BOOL bExist = FALSE; CPDF_Object* pPages = GetObject(dwPageNo, pHints, &bExist); if (!bExist) { m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } if (!pPages) { if (m_docStatus == PDF_DATAAVAIL_ERROR) { m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } return FALSE; } CPDF_Array* pArray = pPages->AsArray(); if (!pArray) { pPages->Release(); m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } pPageNode->m_type = PDF_PAGENODE_PAGES; for (FX_DWORD i = 0; i < pArray->GetCount(); ++i) { CPDF_Reference* pKid = ToReference(pArray->GetElement(i)); if (!pKid) continue; PageNode* pNode = new PageNode(); pPageNode->m_childNode.Add(pNode); pNode->m_dwPageNo = pKid->GetRefObjNum(); } pPages->Release(); return TRUE; } FX_BOOL CPDF_DataAvail::CheckUnkownPageNode( FX_DWORD dwPageNo, CPDF_DataAvail::PageNode* pPageNode, IPDF_DataAvail::DownloadHints* pHints) { FX_BOOL bExist = FALSE; CPDF_Object* pPage = GetObject(dwPageNo, pHints, &bExist); if (!bExist) { m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } if (!pPage) { if (m_docStatus == PDF_DATAAVAIL_ERROR) m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } if (pPage->IsArray()) { pPageNode->m_dwPageNo = dwPageNo; pPageNode->m_type = PDF_PAGENODE_ARRAY; pPage->Release(); return TRUE; } if (!pPage->IsDictionary()) { pPage->Release(); m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } pPageNode->m_dwPageNo = dwPageNo; CPDF_Dictionary* pDict = pPage->GetDict(); CFX_ByteString type = pDict->GetStringBy("Type"); if (type == "Pages") { pPageNode->m_type = PDF_PAGENODE_PAGES; CPDF_Object* pKids = pDict->GetElement("Kids"); if (!pKids) { m_docStatus = PDF_DATAAVAIL_PAGE; return TRUE; } switch (pKids->GetType()) { case CPDF_Object::REFERENCE: { CPDF_Reference* pKid = pKids->AsReference(); PageNode* pNode = new PageNode(); pPageNode->m_childNode.Add(pNode); pNode->m_dwPageNo = pKid->GetRefObjNum(); } break; case CPDF_Object::ARRAY: { CPDF_Array* pKidsArray = pKids->AsArray(); for (FX_DWORD i = 0; i < pKidsArray->GetCount(); ++i) { CPDF_Reference* pKid = ToReference(pKidsArray->GetElement(i)); if (!pKid) continue; PageNode* pNode = new PageNode(); pPageNode->m_childNode.Add(pNode); pNode->m_dwPageNo = pKid->GetRefObjNum(); } } break; default: break; } } else if (type == "Page") { pPageNode->m_type = PDF_PAGENODE_PAGE; } else { pPage->Release(); m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } pPage->Release(); return TRUE; } FX_BOOL CPDF_DataAvail::CheckPageNode(CPDF_DataAvail::PageNode& pageNodes, int32_t iPage, int32_t& iCount, IPDF_DataAvail::DownloadHints* pHints, int level) { if (level >= kMaxPageRecursionDepth) return FALSE; int32_t iSize = pageNodes.m_childNode.GetSize(); if (iSize <= 0 || iPage >= iSize) { m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } for (int32_t i = 0; i < iSize; ++i) { PageNode* pNode = pageNodes.m_childNode.GetAt(i); if (!pNode) continue; switch (pNode->m_type) { case PDF_PAGENODE_UNKNOWN: if (!CheckUnkownPageNode(pNode->m_dwPageNo, pNode, pHints)) { return FALSE; } --i; break; case PDF_PAGENODE_PAGE: iCount++; if (iPage == iCount && m_pDocument) m_pDocument->m_PageList.SetAt(iPage, pNode->m_dwPageNo); break; case PDF_PAGENODE_PAGES: if (!CheckPageNode(*pNode, iPage, iCount, pHints, level + 1)) return FALSE; break; case PDF_PAGENODE_ARRAY: if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode, pHints)) return FALSE; --i; break; } if (iPage == iCount) { m_docStatus = PDF_DATAAVAIL_DONE; return TRUE; } } return TRUE; } FX_BOOL CPDF_DataAvail::LoadDocPage(int32_t iPage, IPDF_DataAvail::DownloadHints* pHints) { if (m_pDocument->GetPageCount() <= iPage || m_pDocument->m_PageList.GetAt(iPage)) { m_docStatus = PDF_DATAAVAIL_DONE; return TRUE; } if (m_pageNodes.m_type == PDF_PAGENODE_PAGE) { if (iPage == 0) { m_docStatus = PDF_DATAAVAIL_DONE; return TRUE; } m_docStatus = PDF_DATAAVAIL_ERROR; return TRUE; } int32_t iCount = -1; return CheckPageNode(m_pageNodes, iPage, iCount, pHints, 0); } FX_BOOL CPDF_DataAvail::CheckPageCount(IPDF_DataAvail::DownloadHints* pHints) { FX_BOOL bExist = FALSE; CPDF_Object* pPages = GetObject(m_PagesObjNum, pHints, &bExist); if (!bExist) { m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } if (!pPages) return FALSE; CPDF_Dictionary* pPagesDict = pPages->GetDict(); if (!pPagesDict) { pPages->Release(); m_docStatus = PDF_DATAAVAIL_ERROR; return FALSE; } if (!pPagesDict->KeyExist("Kids")) { pPages->Release(); return TRUE; } int count = pPagesDict->GetIntegerBy("Count"); if (count > 0) { pPages->Release(); return TRUE; } pPages->Release(); return FALSE; } FX_BOOL CPDF_DataAvail::LoadDocPages(IPDF_DataAvail::DownloadHints* pHints) { if (!CheckUnkownPageNode(m_PagesObjNum, &m_pageNodes, pHints)) return FALSE; if (CheckPageCount(pHints)) { m_docStatus = PDF_DATAAVAIL_PAGE; return TRUE; } m_bTotalLoadPageTree = TRUE; return FALSE; } FX_BOOL CPDF_DataAvail::LoadPages(IPDF_DataAvail::DownloadHints* pHints) { while (!m_bPagesTreeLoad) { if (!CheckPageStatus(pHints)) return FALSE; } if (m_bPagesLoad) return TRUE; m_pDocument->LoadPages(); return FALSE; } IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData( IPDF_DataAvail::DownloadHints* pHints) { if (m_bLinearedDataOK) return DataAvailable; if (!m_bMainXRefLoadTried) { FX_SAFE_DWORD data_size = m_dwFileLen; data_size -= m_dwLastXRefOffset; if (!data_size.IsValid()) return DataError; if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, data_size.ValueOrDie())) { pHints->AddSegment(m_dwLastXRefOffset, data_size.ValueOrDie()); return DataNotAvailable; } CPDF_Parser::Error eRet = m_pDocument->GetParser()->LoadLinearizedMainXRefTable(); m_bMainXRefLoadTried = TRUE; if (eRet != CPDF_Parser::SUCCESS) return DataError; if (!PreparePageItem()) return DataNotAvailable; m_bMainXRefLoadedOK = TRUE; m_bLinearedDataOK = TRUE; } return m_bLinearedDataOK ? DataAvailable : DataNotAvailable; } FX_BOOL CPDF_DataAvail::CheckPageAnnots(int32_t iPage, IPDF_DataAvail::DownloadHints* pHints) { if (!m_objs_array.GetSize()) { m_objs_array.RemoveAll(); m_ObjectSet.clear(); CPDF_Dictionary* pPageDict = m_pDocument->GetPage(iPage); if (!pPageDict) return TRUE; CPDF_Object* pAnnots = pPageDict->GetElement("Annots"); if (!pAnnots) return TRUE; CFX_ArrayTemplate obj_array; obj_array.Add(pAnnots); FX_BOOL bRet = IsObjectsAvail(obj_array, FALSE, pHints, m_objs_array); if (bRet) m_objs_array.RemoveAll(); return bRet; } CFX_ArrayTemplate new_objs_array; FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); m_objs_array.RemoveAll(); if (!bRet) m_objs_array.Append(new_objs_array); return bRet; } IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedFirstPage( int32_t iPage, IPDF_DataAvail::DownloadHints* pHints) { if (!m_bAnnotsLoad) { if (!CheckPageAnnots(iPage, pHints)) return DataNotAvailable; m_bAnnotsLoad = TRUE; } DocAvailStatus nRet = CheckLinearizedData(pHints); if (nRet == DataAvailable) m_bPageLoadedOK = FALSE; return nRet; } FX_BOOL CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary* pDict) { CFX_AutoRestorer restorer(&s_CurrentDataAvailRecursionDepth); if (++s_CurrentDataAvailRecursionDepth > kMaxDataAvailRecursionDepth) return FALSE; CPDF_Object* pParent = pDict->GetElement("Parent"); if (!pParent) return FALSE; CPDF_Dictionary* pParentDict = pParent->GetDict(); if (!pParentDict) return FALSE; CPDF_Object* pRet = pParentDict->GetElement("Resources"); if (pRet) { m_pPageResource = pRet; return TRUE; } return HaveResourceAncestor(pParentDict); } IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail( int32_t iPage, IPDF_DataAvail::DownloadHints* pHints) { if (!m_pDocument) return DataError; if (IsFirstCheck(iPage)) { m_bCurPageDictLoadOK = FALSE; m_bPageLoadedOK = FALSE; m_bAnnotsLoad = FALSE; m_bNeedDownLoadResource = FALSE; m_objs_array.RemoveAll(); m_ObjectSet.clear(); } if (pdfium::ContainsKey(m_pagesLoadState, iPage)) return DataAvailable; if (m_bLinearized) { if ((FX_DWORD)iPage == m_dwFirstPageNo) { DocAvailStatus nRet = CheckLinearizedFirstPage(iPage, pHints); if (nRet == DataAvailable) m_pagesLoadState.insert(iPage); return nRet; } DocAvailStatus nResult = CheckLinearizedData(pHints); if (nResult != DataAvailable) return nResult; if (m_pHintTables) { nResult = m_pHintTables->CheckPage(iPage, pHints); if (nResult != DataAvailable) return nResult; m_pagesLoadState.insert(iPage); return DataAvailable; } if (m_bMainXRefLoadedOK) { if (m_bTotalLoadPageTree) { if (!LoadPages(pHints)) return DataNotAvailable; } else { if (!m_bCurPageDictLoadOK && !CheckPage(iPage, pHints)) return DataNotAvailable; } } else { if (!LoadAllFile(pHints)) return DataNotAvailable; m_pDocument->GetParser()->RebuildCrossRef(); ResetFirstCheck(iPage); return DataAvailable; } } else { if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && !CheckPage(iPage, pHints)) { return DataNotAvailable; } } if (m_bHaveAcroForm && !m_bAcroFormLoad) { if (!CheckAcroFormSubObject(pHints)) return DataNotAvailable; m_bAcroFormLoad = TRUE; } if (!m_bPageLoadedOK) { if (!m_objs_array.GetSize()) { m_objs_array.RemoveAll(); m_ObjectSet.clear(); m_pPageDict = m_pDocument->GetPage(iPage); if (!m_pPageDict) { ResetFirstCheck(iPage); return DataAvailable; } CFX_ArrayTemplate obj_array; obj_array.Add(m_pPageDict); FX_BOOL bRet = IsObjectsAvail(obj_array, TRUE, pHints, m_objs_array); if (!bRet) return DataNotAvailable; m_objs_array.RemoveAll(); } else { CFX_ArrayTemplate new_objs_array; FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); m_objs_array.RemoveAll(); if (!bRet) { m_objs_array.Append(new_objs_array); return DataNotAvailable; } } m_bPageLoadedOK = TRUE; } if (!m_bAnnotsLoad) { if (!CheckPageAnnots(iPage, pHints)) return DataNotAvailable; m_bAnnotsLoad = TRUE; } if (m_pPageDict && !m_bNeedDownLoadResource) { m_pPageResource = m_pPageDict->GetElement("Resources"); if (!m_pPageResource) m_bNeedDownLoadResource = HaveResourceAncestor(m_pPageDict); else m_bNeedDownLoadResource = TRUE; } if (m_bNeedDownLoadResource) { FX_BOOL bRet = CheckResources(pHints); if (!bRet) return DataNotAvailable; m_bNeedDownLoadResource = FALSE; } m_bPageLoadedOK = FALSE; m_bAnnotsLoad = FALSE; m_bCurPageDictLoadOK = FALSE; ResetFirstCheck(iPage); m_pagesLoadState.insert(iPage); return DataAvailable; } FX_BOOL CPDF_DataAvail::CheckResources(IPDF_DataAvail::DownloadHints* pHints) { if (!m_objs_array.GetSize()) { m_objs_array.RemoveAll(); CFX_ArrayTemplate obj_array; obj_array.Add(m_pPageResource); FX_BOOL bRet = IsObjectsAvail(obj_array, TRUE, pHints, m_objs_array); if (bRet) m_objs_array.RemoveAll(); return bRet; } CFX_ArrayTemplate new_objs_array; FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); m_objs_array.RemoveAll(); if (!bRet) m_objs_array.Append(new_objs_array); return bRet; } void CPDF_DataAvail::GetLinearizedMainXRefInfo(FX_FILESIZE* pPos, FX_DWORD* pSize) { if (pPos) *pPos = m_dwLastXRefOffset; if (pSize) *pSize = (FX_DWORD)(m_dwFileLen - m_dwLastXRefOffset); } int CPDF_DataAvail::GetPageCount() const { if (m_pLinearized) { CPDF_Dictionary* pDict = m_pLinearized->GetDict(); CPDF_Object* pObj = pDict ? pDict->GetElementValue("N") : nullptr; return pObj ? pObj->GetInteger() : 0; } return m_pDocument ? m_pDocument->GetPageCount() : 0; } CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) { if (!m_pDocument || index < 0 || index >= GetPageCount()) return nullptr; if (m_pLinearized) { CPDF_Dictionary* pDict = m_pLinearized->GetDict(); CPDF_Object* pObj = pDict ? pDict->GetElementValue("P") : nullptr; int pageNum = pObj ? pObj->GetInteger() : 0; if (m_pHintTables && index != pageNum) { FX_FILESIZE szPageStartPos = 0; FX_FILESIZE szPageLength = 0; FX_DWORD dwObjNum = 0; FX_BOOL bPagePosGot = m_pHintTables->GetPagePos(index, szPageStartPos, szPageLength, dwObjNum); if (!bPagePosGot) return nullptr; m_syntaxParser.InitParser(m_pFileRead, (FX_DWORD)szPageStartPos); CPDF_Object* pPageDict = ParseIndirectObjectAt(0, dwObjNum, m_pDocument); if (!pPageDict) return nullptr; if (!m_pDocument->InsertIndirectObject(dwObjNum, pPageDict)) return nullptr; return pPageDict->GetDict(); } } return m_pDocument->GetPage(index); } IPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail( IPDF_DataAvail::DownloadHints* pHints) { if (!m_pDocument) return FormAvailable; if (!m_bLinearizedFormParamLoad) { CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); if (!pRoot) return FormAvailable; CPDF_Object* pAcroForm = pRoot->GetElement("AcroForm"); if (!pAcroForm) return FormNotExist; DocAvailStatus nDocStatus = CheckLinearizedData(pHints); if (nDocStatus == DataError) return FormError; if (nDocStatus == DataNotAvailable) return FormNotAvailable; if (!m_objs_array.GetSize()) m_objs_array.Add(pAcroForm->GetDict()); m_bLinearizedFormParamLoad = TRUE; } CFX_ArrayTemplate new_objs_array; FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); m_objs_array.RemoveAll(); if (!bRet) { m_objs_array.Append(new_objs_array); return FormNotAvailable; } return FormAvailable; } CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {} CPDF_DataAvail::PageNode::~PageNode() { for (int32_t i = 0; i < m_childNode.GetSize(); ++i) delete m_childNode[i]; m_childNode.RemoveAll(); }