From 764ec513eecbebd12781bcc96ce81ed5e736ee92 Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Mon, 14 Mar 2016 13:35:12 -0400 Subject: Move core/src/ up to core/. This CL moves the core/src/ files up to core/ and fixes up the include guards, includes and build files. R=tsepez@chromium.org Review URL: https://codereview.chromium.org/1800523005 . --- core/fpdfapi/fpdf_parser/cpdf_data_avail.cpp | 1841 ++++++++++++++++++++++++++ 1 file changed, 1841 insertions(+) create mode 100644 core/fpdfapi/fpdf_parser/cpdf_data_avail.cpp (limited to 'core/fpdfapi/fpdf_parser/cpdf_data_avail.cpp') diff --git a/core/fpdfapi/fpdf_parser/cpdf_data_avail.cpp b/core/fpdfapi/fpdf_parser/cpdf_data_avail.cpp new file mode 100644 index 0000000000..93916cba8b --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_data_avail.cpp @@ -0,0 +1,1841 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fpdfapi/fpdf_parser/cpdf_data_avail.h" +#include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h" +#include "core/include/fpdfapi/cpdf_array.h" +#include "core/include/fpdfapi/cpdf_dictionary.h" +#include "core/include/fpdfapi/cpdf_document.h" +#include "core/include/fpdfapi/cpdf_name.h" +#include "core/include/fpdfapi/cpdf_number.h" +#include "core/include/fpdfapi/cpdf_reference.h" +#include "core/include/fpdfapi/cpdf_stream.h" +#include "core/include/fxcrt/fx_ext.h" +#include "core/include/fxcrt/fx_safe_types.h" +#include "third_party/base/stl_util.h" + +IPDF_DataAvail::IPDF_DataAvail(IPDF_DataAvail::FileAvail* pFileAvail, + IFX_FileRead* pFileRead) + : m_pFileAvail(pFileAvail), m_pFileRead(pFileRead) {} + +IPDF_DataAvail::~IPDF_DataAvail() {} + +IPDF_DataAvail::FileAvail::~FileAvail() {} + +IPDF_DataAvail::DownloadHints::~DownloadHints() {} + +// static +IPDF_DataAvail* IPDF_DataAvail::Create(IPDF_DataAvail::FileAvail* pFileAvail, + IFX_FileRead* pFileRead) { + return new CPDF_DataAvail(pFileAvail, pFileRead, TRUE); +} + +// static +int CPDF_DataAvail::s_CurrentDataAvailRecursionDepth = 0; + +CPDF_DataAvail::CPDF_DataAvail(IPDF_DataAvail::FileAvail* pFileAvail, + IFX_FileRead* pFileRead, + FX_BOOL bSupportHintTable) + : IPDF_DataAvail(pFileAvail, pFileRead) { + m_Pos = 0; + m_dwFileLen = 0; + if (m_pFileRead) { + m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize(); + } + m_dwCurrentOffset = 0; + m_dwXRefOffset = 0; + m_bufferOffset = 0; + m_dwFirstPageNo = 0; + m_bufferSize = 0; + m_PagesObjNum = 0; + m_dwCurrentXRefSteam = 0; + m_dwAcroFormObjNum = 0; + m_dwInfoObjNum = 0; + m_pDocument = 0; + m_dwEncryptObjNum = 0; + m_dwPrevXRefOffset = 0; + m_dwLastXRefOffset = 0; + m_bDocAvail = FALSE; + m_bMainXRefLoadTried = FALSE; + m_bDocAvail = FALSE; + m_bLinearized = FALSE; + m_bPagesLoad = FALSE; + m_bPagesTreeLoad = FALSE; + m_bMainXRefLoadedOK = FALSE; + m_bAnnotsLoad = FALSE; + m_bHaveAcroForm = FALSE; + m_bAcroFormLoad = FALSE; + m_bPageLoadedOK = FALSE; + m_bNeedDownLoadResource = FALSE; + m_bLinearizedFormParamLoad = FALSE; + m_pLinearized = NULL; + m_pRoot = NULL; + m_pTrailer = NULL; + m_pCurrentParser = NULL; + m_pAcroForm = NULL; + m_pPageDict = NULL; + m_pPageResource = NULL; + m_docStatus = PDF_DATAAVAIL_HEADER; + m_parser.m_bOwnFileRead = false; + m_bTotalLoadPageTree = FALSE; + m_bCurPageDictLoadOK = FALSE; + m_bLinearedDataOK = FALSE; + m_bSupportHintTable = bSupportHintTable; +} +CPDF_DataAvail::~CPDF_DataAvail() { + if (m_pLinearized) + m_pLinearized->Release(); + + if (m_pRoot) + m_pRoot->Release(); + + if (m_pTrailer) + m_pTrailer->Release(); + + int iSize = m_arrayAcroforms.GetSize(); + for (int i = 0; i < iSize; ++i) + m_arrayAcroforms.GetAt(i)->Release(); +} + +void CPDF_DataAvail::SetDocument(CPDF_Document* pDoc) { + m_pDocument = pDoc; +} + +FX_DWORD CPDF_DataAvail::GetObjectSize(FX_DWORD objnum, FX_FILESIZE& offset) { + CPDF_Parser* pParser = m_pDocument->GetParser(); + if (!pParser || !pParser->IsValidObjectNumber(objnum)) + return 0; + + if (pParser->GetObjectType(objnum) == 2) + objnum = pParser->GetObjectPositionOrZero(objnum); + + if (pParser->GetObjectType(objnum) != 1 && + pParser->GetObjectType(objnum) != 255) { + return 0; + } + + offset = pParser->GetObjectPositionOrZero(objnum); + if (offset == 0) + return 0; + + auto it = pParser->m_SortedOffset.find(offset); + if (it == pParser->m_SortedOffset.end() || + ++it == pParser->m_SortedOffset.end()) { + return 0; + } + return *it - offset; +} + +FX_BOOL CPDF_DataAvail::IsObjectsAvail( + CFX_ArrayTemplate& obj_array, + FX_BOOL bParsePage, + IPDF_DataAvail::DownloadHints* pHints, + CFX_ArrayTemplate& ret_array) { + if (!obj_array.GetSize()) + return TRUE; + + FX_DWORD count = 0; + CFX_ArrayTemplate new_obj_array; + int32_t i = 0; + for (i = 0; i < obj_array.GetSize(); i++) { + CPDF_Object* pObj = obj_array[i]; + if (!pObj) + continue; + + int32_t type = pObj->GetType(); + switch (type) { + case CPDF_Object::ARRAY: { + CPDF_Array* pArray = pObj->GetArray(); + for (FX_DWORD k = 0; k < pArray->GetCount(); ++k) + new_obj_array.Add(pArray->GetElement(k)); + } break; + case CPDF_Object::STREAM: + pObj = pObj->GetDict(); + case CPDF_Object::DICTIONARY: { + CPDF_Dictionary* pDict = pObj->GetDict(); + if (pDict && pDict->GetStringBy("Type") == "Page" && !bParsePage) + continue; + + for (const auto& it : *pDict) { + const CFX_ByteString& key = it.first; + CPDF_Object* value = it.second; + if (key != "Parent") + new_obj_array.Add(value); + } + } break; + case CPDF_Object::REFERENCE: { + CPDF_Reference* pRef = pObj->AsReference(); + FX_DWORD dwNum = pRef->GetRefObjNum(); + + FX_FILESIZE offset; + FX_DWORD size = GetObjectSize(dwNum, offset); + if (size == 0 || offset < 0 || offset >= m_dwFileLen) + break; + + if (!IsDataAvail(offset, size, pHints)) { + ret_array.Add(pObj); + count++; + } else if (!pdfium::ContainsKey(m_ObjectSet, dwNum)) { + m_ObjectSet.insert(dwNum); + CPDF_Object* pReferred = + m_pDocument->GetIndirectObject(pRef->GetRefObjNum()); + if (pReferred) + new_obj_array.Add(pReferred); + } + } break; + } + } + + if (count > 0) { + int32_t iSize = new_obj_array.GetSize(); + for (i = 0; i < iSize; ++i) { + CPDF_Object* pObj = new_obj_array[i]; + if (CPDF_Reference* pRef = pObj->AsReference()) { + FX_DWORD dwNum = pRef->GetRefObjNum(); + if (!pdfium::ContainsKey(m_ObjectSet, dwNum)) + ret_array.Add(pObj); + } else { + ret_array.Add(pObj); + } + } + return FALSE; + } + + obj_array.RemoveAll(); + obj_array.Append(new_obj_array); + return IsObjectsAvail(obj_array, FALSE, pHints, ret_array); +} + +IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail( + IPDF_DataAvail::DownloadHints* pHints) { + if (!m_dwFileLen && m_pFileRead) { + m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize(); + if (!m_dwFileLen) + return DataError; + } + + while (!m_bDocAvail) { + if (!CheckDocStatus(pHints)) + return DataNotAvailable; + } + + return DataAvailable; +} + +FX_BOOL CPDF_DataAvail::CheckAcroFormSubObject( + IPDF_DataAvail::DownloadHints* pHints) { + if (!m_objs_array.GetSize()) { + m_objs_array.RemoveAll(); + m_ObjectSet.clear(); + CFX_ArrayTemplate obj_array; + obj_array.Append(m_arrayAcroforms); + FX_BOOL bRet = IsObjectsAvail(obj_array, FALSE, pHints, m_objs_array); + if (bRet) + m_objs_array.RemoveAll(); + return bRet; + } + + CFX_ArrayTemplate new_objs_array; + FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); + if (bRet) { + int32_t iSize = m_arrayAcroforms.GetSize(); + for (int32_t i = 0; i < iSize; ++i) { + m_arrayAcroforms.GetAt(i)->Release(); + } + m_arrayAcroforms.RemoveAll(); + } else { + m_objs_array.RemoveAll(); + m_objs_array.Append(new_objs_array); + } + return bRet; +} + +FX_BOOL CPDF_DataAvail::CheckAcroForm(IPDF_DataAvail::DownloadHints* pHints) { + FX_BOOL bExist = FALSE; + m_pAcroForm = GetObject(m_dwAcroFormObjNum, pHints, &bExist); + if (!bExist) { + m_docStatus = PDF_DATAAVAIL_PAGETREE; + return TRUE; + } + + if (!m_pAcroForm) { + if (m_docStatus == PDF_DATAAVAIL_ERROR) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + return FALSE; + } + + m_arrayAcroforms.Add(m_pAcroForm); + m_docStatus = PDF_DATAAVAIL_PAGETREE; + return TRUE; +} + +FX_BOOL CPDF_DataAvail::CheckDocStatus(IPDF_DataAvail::DownloadHints* pHints) { + switch (m_docStatus) { + case PDF_DATAAVAIL_HEADER: + return CheckHeader(pHints); + case PDF_DATAAVAIL_FIRSTPAGE: + case PDF_DATAAVAIL_FIRSTPAGE_PREPARE: + return CheckFirstPage(pHints); + case PDF_DATAAVAIL_HINTTABLE: + return CheckHintTables(pHints); + case PDF_DATAAVAIL_END: + return CheckEnd(pHints); + case PDF_DATAAVAIL_CROSSREF: + return CheckCrossRef(pHints); + case PDF_DATAAVAIL_CROSSREF_ITEM: + return CheckCrossRefItem(pHints); + case PDF_DATAAVAIL_CROSSREF_STREAM: + return CheckAllCrossRefStream(pHints); + case PDF_DATAAVAIL_TRAILER: + return CheckTrailer(pHints); + case PDF_DATAAVAIL_TRAILER_APPEND: + return CheckTrailerAppend(pHints); + case PDF_DATAAVAIL_LOADALLCROSSREF: + return LoadAllXref(pHints); + case PDF_DATAAVAIL_LOADALLFILE: + return LoadAllFile(pHints); + case PDF_DATAAVAIL_ROOT: + return CheckRoot(pHints); + case PDF_DATAAVAIL_INFO: + return CheckInfo(pHints); + case PDF_DATAAVAIL_ACROFORM: + return CheckAcroForm(pHints); + case PDF_DATAAVAIL_PAGETREE: + if (m_bTotalLoadPageTree) + return CheckPages(pHints); + return LoadDocPages(pHints); + case PDF_DATAAVAIL_PAGE: + if (m_bTotalLoadPageTree) + return CheckPage(pHints); + m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD; + return TRUE; + case PDF_DATAAVAIL_ERROR: + return LoadAllFile(pHints); + case PDF_DATAAVAIL_PAGE_LATERLOAD: + m_docStatus = PDF_DATAAVAIL_PAGE; + default: + m_bDocAvail = TRUE; + return TRUE; + } +} + +FX_BOOL CPDF_DataAvail::CheckPageStatus(IPDF_DataAvail::DownloadHints* pHints) { + switch (m_docStatus) { + case PDF_DATAAVAIL_PAGETREE: + return CheckPages(pHints); + case PDF_DATAAVAIL_PAGE: + return CheckPage(pHints); + case PDF_DATAAVAIL_ERROR: + return LoadAllFile(pHints); + default: + m_bPagesTreeLoad = TRUE; + m_bPagesLoad = TRUE; + return TRUE; + } +} + +FX_BOOL CPDF_DataAvail::LoadAllFile(IPDF_DataAvail::DownloadHints* pHints) { + if (m_pFileAvail->IsDataAvail(0, (FX_DWORD)m_dwFileLen)) { + m_docStatus = PDF_DATAAVAIL_DONE; + return TRUE; + } + + pHints->AddSegment(0, (FX_DWORD)m_dwFileLen); + return FALSE; +} + +FX_BOOL CPDF_DataAvail::LoadAllXref(IPDF_DataAvail::DownloadHints* pHints) { + m_parser.m_pSyntax->InitParser(m_pFileRead, (FX_DWORD)m_dwHeaderOffset); + m_parser.m_bOwnFileRead = false; + if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) && + !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return FALSE; + } + + m_dwRootObjNum = m_parser.GetRootObjNum(); + m_dwInfoObjNum = m_parser.GetInfoObjNum(); + m_pCurrentParser = &m_parser; + m_docStatus = PDF_DATAAVAIL_ROOT; + return TRUE; +} + +CPDF_Object* CPDF_DataAvail::GetObject(FX_DWORD objnum, + IPDF_DataAvail::DownloadHints* pHints, + FX_BOOL* pExistInFile) { + CPDF_Object* pRet = nullptr; + FX_DWORD size = 0; + FX_FILESIZE offset = 0; + CPDF_Parser* pParser = nullptr; + + if (pExistInFile) + *pExistInFile = TRUE; + + if (m_pDocument) { + size = GetObjectSize(objnum, offset); + pParser = m_pDocument->GetParser(); + } else { + size = (FX_DWORD)m_parser.GetObjectSize(objnum); + offset = m_parser.GetObjectOffset(objnum); + pParser = &m_parser; + } + + if (!IsDataAvail(offset, size, pHints)) + return nullptr; + + if (pParser) + pRet = pParser->ParseIndirectObject(nullptr, objnum); + + if (!pRet && pExistInFile) + *pExistInFile = FALSE; + + return pRet; +} + +FX_BOOL CPDF_DataAvail::CheckInfo(IPDF_DataAvail::DownloadHints* pHints) { + FX_BOOL bExist = FALSE; + CPDF_Object* pInfo = GetObject(m_dwInfoObjNum, pHints, &bExist); + if (!bExist) { + m_docStatus = + (m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE); + return TRUE; + } + + if (!pInfo) { + if (m_docStatus == PDF_DATAAVAIL_ERROR) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + + if (m_Pos == m_dwFileLen) + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + if (pInfo) + pInfo->Release(); + + m_docStatus = + (m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE); + + return TRUE; +} + +FX_BOOL CPDF_DataAvail::CheckRoot(IPDF_DataAvail::DownloadHints* pHints) { + FX_BOOL bExist = FALSE; + m_pRoot = GetObject(m_dwRootObjNum, pHints, &bExist); + if (!bExist) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + + if (!m_pRoot) { + if (m_docStatus == PDF_DATAAVAIL_ERROR) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + return FALSE; + } + + CPDF_Dictionary* pDict = m_pRoot->GetDict(); + if (!pDict) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + CPDF_Reference* pRef = ToReference(pDict->GetElement("Pages")); + if (!pRef) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + m_PagesObjNum = pRef->GetRefObjNum(); + CPDF_Reference* pAcroFormRef = + ToReference(m_pRoot->GetDict()->GetElement("AcroForm")); + if (pAcroFormRef) { + m_bHaveAcroForm = TRUE; + m_dwAcroFormObjNum = pAcroFormRef->GetRefObjNum(); + } + + if (m_dwInfoObjNum) { + m_docStatus = PDF_DATAAVAIL_INFO; + } else { + m_docStatus = + m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE; + } + return TRUE; +} + +FX_BOOL CPDF_DataAvail::PreparePageItem() { + CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); + CPDF_Reference* pRef = + ToReference(pRoot ? pRoot->GetElement("Pages") : nullptr); + if (!pRef) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + m_PagesObjNum = pRef->GetRefObjNum(); + m_pCurrentParser = m_pDocument->GetParser(); + m_docStatus = PDF_DATAAVAIL_PAGETREE; + return TRUE; +} + +bool CPDF_DataAvail::IsFirstCheck(int iPage) { + return m_pageMapCheckState.insert(iPage).second; +} + +void CPDF_DataAvail::ResetFirstCheck(int iPage) { + m_pageMapCheckState.erase(iPage); +} + +FX_BOOL CPDF_DataAvail::CheckPage(IPDF_DataAvail::DownloadHints* pHints) { + FX_DWORD iPageObjs = m_PageObjList.GetSize(); + CFX_DWordArray UnavailObjList; + for (FX_DWORD i = 0; i < iPageObjs; ++i) { + FX_DWORD dwPageObjNum = m_PageObjList.GetAt(i); + FX_BOOL bExist = FALSE; + CPDF_Object* pObj = GetObject(dwPageObjNum, pHints, &bExist); + if (!pObj) { + if (bExist) + UnavailObjList.Add(dwPageObjNum); + continue; + } + + if (pObj->IsArray()) { + CPDF_Array* pArray = pObj->GetArray(); + if (pArray) { + int32_t iSize = pArray->GetCount(); + for (int32_t j = 0; j < iSize; ++j) { + if (CPDF_Reference* pRef = ToReference(pArray->GetElement(j))) + UnavailObjList.Add(pRef->GetRefObjNum()); + } + } + } + + if (!pObj->IsDictionary()) { + pObj->Release(); + continue; + } + + CFX_ByteString type = pObj->GetDict()->GetStringBy("Type"); + if (type == "Pages") { + m_PagesArray.Add(pObj); + continue; + } + pObj->Release(); + } + + m_PageObjList.RemoveAll(); + if (UnavailObjList.GetSize()) { + m_PageObjList.Append(UnavailObjList); + return FALSE; + } + + FX_DWORD iPages = m_PagesArray.GetSize(); + for (FX_DWORD i = 0; i < iPages; i++) { + CPDF_Object* pPages = m_PagesArray.GetAt(i); + if (!pPages) + continue; + + if (!GetPageKids(m_pCurrentParser, pPages)) { + pPages->Release(); + while (++i < iPages) { + pPages = m_PagesArray.GetAt(i); + pPages->Release(); + } + m_PagesArray.RemoveAll(); + + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + pPages->Release(); + } + + m_PagesArray.RemoveAll(); + if (!m_PageObjList.GetSize()) + m_docStatus = PDF_DATAAVAIL_DONE; + return TRUE; +} + +FX_BOOL CPDF_DataAvail::GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages) { + if (!pParser) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + CPDF_Dictionary* pDict = pPages->GetDict(); + CPDF_Object* pKids = pDict ? pDict->GetElement("Kids") : NULL; + if (!pKids) + return TRUE; + + switch (pKids->GetType()) { + case CPDF_Object::REFERENCE: + m_PageObjList.Add(pKids->AsReference()->GetRefObjNum()); + break; + case CPDF_Object::ARRAY: { + CPDF_Array* pKidsArray = pKids->AsArray(); + for (FX_DWORD i = 0; i < pKidsArray->GetCount(); ++i) { + if (CPDF_Reference* pRef = ToReference(pKidsArray->GetElement(i))) + m_PageObjList.Add(pRef->GetRefObjNum()); + } + } break; + default: + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + return TRUE; +} + +FX_BOOL CPDF_DataAvail::CheckPages(IPDF_DataAvail::DownloadHints* pHints) { + FX_BOOL bExist = FALSE; + CPDF_Object* pPages = GetObject(m_PagesObjNum, pHints, &bExist); + if (!bExist) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + + if (!pPages) { + if (m_docStatus == PDF_DATAAVAIL_ERROR) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + return FALSE; + } + + if (!GetPageKids(m_pCurrentParser, pPages)) { + pPages->Release(); + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + pPages->Release(); + m_docStatus = PDF_DATAAVAIL_PAGE; + return TRUE; +} + +FX_BOOL CPDF_DataAvail::CheckHeader(IPDF_DataAvail::DownloadHints* pHints) { + FX_DWORD req_size = 1024; + if ((FX_FILESIZE)req_size > m_dwFileLen) + req_size = (FX_DWORD)m_dwFileLen; + + if (m_pFileAvail->IsDataAvail(0, req_size)) { + uint8_t buffer[1024]; + m_pFileRead->ReadBlock(buffer, 0, req_size); + + if (IsLinearizedFile(buffer, req_size)) { + m_docStatus = PDF_DATAAVAIL_FIRSTPAGE; + } else { + if (m_docStatus == PDF_DATAAVAIL_ERROR) + return FALSE; + m_docStatus = PDF_DATAAVAIL_END; + } + return TRUE; + } + + pHints->AddSegment(0, req_size); + return FALSE; +} + +FX_BOOL CPDF_DataAvail::CheckFirstPage(IPDF_DataAvail::DownloadHints* pHints) { + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + CPDF_Object* pEndOffSet = pDict ? pDict->GetElement("E") : NULL; + if (!pEndOffSet) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + CPDF_Object* pXRefOffset = pDict ? pDict->GetElement("T") : NULL; + if (!pXRefOffset) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + CPDF_Object* pFileLen = pDict ? pDict->GetElement("L") : NULL; + if (!pFileLen) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + FX_BOOL bNeedDownLoad = FALSE; + if (pEndOffSet->IsNumber()) { + FX_DWORD dwEnd = pEndOffSet->GetInteger(); + dwEnd += 512; + if ((FX_FILESIZE)dwEnd > m_dwFileLen) + dwEnd = (FX_DWORD)m_dwFileLen; + + int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen); + int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0; + if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) { + pHints->AddSegment(iStartPos, iSize); + bNeedDownLoad = TRUE; + } + } + + m_dwLastXRefOffset = 0; + FX_FILESIZE dwFileLen = 0; + if (pXRefOffset->IsNumber()) + m_dwLastXRefOffset = pXRefOffset->GetInteger(); + + if (pFileLen->IsNumber()) + dwFileLen = pFileLen->GetInteger(); + + if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, + (FX_DWORD)(dwFileLen - m_dwLastXRefOffset))) { + if (m_docStatus == PDF_DATAAVAIL_FIRSTPAGE) { + FX_DWORD dwSize = (FX_DWORD)(dwFileLen - m_dwLastXRefOffset); + FX_FILESIZE offset = m_dwLastXRefOffset; + if (dwSize < 512 && dwFileLen > 512) { + dwSize = 512; + offset = dwFileLen - 512; + } + pHints->AddSegment(offset, dwSize); + } + } else { + m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE; + } + + if (bNeedDownLoad || m_docStatus != PDF_DATAAVAIL_FIRSTPAGE_PREPARE) { + m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE; + return FALSE; + } + + m_docStatus = + m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE; + return TRUE; +} + +FX_BOOL CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset, + FX_DWORD size, + IPDF_DataAvail::DownloadHints* pHints) { + if (offset > m_dwFileLen) + return TRUE; + + FX_SAFE_DWORD safeSize = pdfium::base::checked_cast(offset); + safeSize += size; + safeSize += 512; + if (!safeSize.IsValid() || safeSize.ValueOrDie() > m_dwFileLen) + size = m_dwFileLen - offset; + else + size += 512; + + if (!m_pFileAvail->IsDataAvail(offset, size)) { + pHints->AddSegment(offset, size); + return FALSE; + } + return TRUE; +} + +FX_BOOL CPDF_DataAvail::CheckHintTables(IPDF_DataAvail::DownloadHints* pHints) { + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + if (!pDict) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + if (!pDict->KeyExist("H") || !pDict->KeyExist("O") || !pDict->KeyExist("N")) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + int nPageCount = pDict->GetElementValue("N")->GetInteger(); + if (nPageCount <= 1) { + m_docStatus = PDF_DATAAVAIL_DONE; + return TRUE; + } + + CPDF_Array* pHintStreamRange = pDict->GetArrayBy("H"); + FX_FILESIZE szHSStart = + pHintStreamRange->GetElementValue(0) + ? pHintStreamRange->GetElementValue(0)->GetInteger() + : 0; + FX_FILESIZE szHSLength = + pHintStreamRange->GetElementValue(1) + ? pHintStreamRange->GetElementValue(1)->GetInteger() + : 0; + if (szHSStart < 0 || szHSLength <= 0) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + if (!IsDataAvail(szHSStart, szHSLength, pHints)) + return FALSE; + + m_syntaxParser.InitParser(m_pFileRead, m_dwHeaderOffset); + + std::unique_ptr pHintTables( + new CPDF_HintTables(this, pDict)); + std::unique_ptr> pHintStream( + ParseIndirectObjectAt(szHSStart, 0)); + CPDF_Stream* pStream = ToStream(pHintStream.get()); + if (pStream && pHintTables->LoadHintStream(pStream)) + m_pHintTables = std::move(pHintTables); + + m_docStatus = PDF_DATAAVAIL_DONE; + return TRUE; +} + +CPDF_Object* CPDF_DataAvail::ParseIndirectObjectAt( + FX_FILESIZE pos, + FX_DWORD objnum, + CPDF_IndirectObjectHolder* pObjList) { + FX_FILESIZE SavedPos = m_syntaxParser.SavePos(); + m_syntaxParser.RestorePos(pos); + + bool bIsNumber; + CFX_ByteString word = m_syntaxParser.GetNextWord(&bIsNumber); + if (!bIsNumber) + return nullptr; + + FX_DWORD parser_objnum = FXSYS_atoui(word); + if (objnum && parser_objnum != objnum) + return nullptr; + + word = m_syntaxParser.GetNextWord(&bIsNumber); + if (!bIsNumber) + return nullptr; + + FX_DWORD gennum = FXSYS_atoui(word); + if (m_syntaxParser.GetKeyword() != "obj") { + m_syntaxParser.RestorePos(SavedPos); + return nullptr; + } + + CPDF_Object* pObj = + m_syntaxParser.GetObject(pObjList, parser_objnum, gennum, true); + m_syntaxParser.RestorePos(SavedPos); + return pObj; +} + +IPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() { + FX_DWORD req_size = 1024; + if (!m_pFileAvail->IsDataAvail(0, req_size)) + return LinearizationUnknown; + + if (!m_pFileRead) + return NotLinearized; + + FX_FILESIZE dwSize = m_pFileRead->GetSize(); + if (dwSize < (FX_FILESIZE)req_size) + return LinearizationUnknown; + + uint8_t buffer[1024]; + m_pFileRead->ReadBlock(buffer, 0, req_size); + if (IsLinearizedFile(buffer, req_size)) + return Linearized; + + return NotLinearized; +} +FX_BOOL CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, FX_DWORD dwLen) { + ScopedFileStream file(FX_CreateMemoryStream(pData, (size_t)dwLen, FALSE)); + + int32_t offset = GetHeaderOffset(file.get()); + if (offset == -1) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + m_dwHeaderOffset = offset; + m_syntaxParser.InitParser(file.get(), offset); + m_syntaxParser.RestorePos(m_syntaxParser.m_HeaderOffset + 9); + + bool bNumber; + CFX_ByteString wordObjNum = m_syntaxParser.GetNextWord(&bNumber); + if (!bNumber) + return FALSE; + + FX_DWORD objnum = FXSYS_atoui(wordObjNum); + if (m_pLinearized) { + m_pLinearized->Release(); + m_pLinearized = nullptr; + } + + m_pLinearized = + ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum); + if (!m_pLinearized) + return FALSE; + + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + if (pDict && pDict->GetElement("Linearized")) { + CPDF_Object* pLen = pDict->GetElement("L"); + if (!pLen) + return FALSE; + + if ((FX_FILESIZE)pLen->GetInteger() != m_pFileRead->GetSize()) + return FALSE; + + m_bLinearized = TRUE; + + if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P"))) + m_dwFirstPageNo = pNo->GetInteger(); + + return TRUE; + } + return FALSE; +} + +FX_BOOL CPDF_DataAvail::CheckEnd(IPDF_DataAvail::DownloadHints* pHints) { + FX_DWORD req_pos = (FX_DWORD)(m_dwFileLen > 1024 ? m_dwFileLen - 1024 : 0); + FX_DWORD dwSize = (FX_DWORD)(m_dwFileLen - req_pos); + + if (m_pFileAvail->IsDataAvail(req_pos, dwSize)) { + uint8_t buffer[1024]; + m_pFileRead->ReadBlock(buffer, req_pos, dwSize); + + ScopedFileStream file(FX_CreateMemoryStream(buffer, (size_t)dwSize, FALSE)); + m_syntaxParser.InitParser(file.get(), 0); + m_syntaxParser.RestorePos(dwSize - 1); + + if (m_syntaxParser.SearchWord("startxref", TRUE, FALSE, dwSize)) { + m_syntaxParser.GetNextWord(nullptr); + + bool bNumber; + CFX_ByteString xrefpos_str = m_syntaxParser.GetNextWord(&bNumber); + if (!bNumber) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + m_dwXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str); + if (!m_dwXRefOffset || m_dwXRefOffset > m_dwFileLen) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + + m_dwLastXRefOffset = m_dwXRefOffset; + SetStartOffset(m_dwXRefOffset); + m_docStatus = PDF_DATAAVAIL_CROSSREF; + return TRUE; + } + + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + + pHints->AddSegment(req_pos, dwSize); + return FALSE; +} + +int32_t CPDF_DataAvail::CheckCrossRefStream( + IPDF_DataAvail::DownloadHints* pHints, + FX_FILESIZE& xref_offset) { + xref_offset = 0; + FX_DWORD req_size = + (FX_DWORD)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); + + if (m_pFileAvail->IsDataAvail(m_Pos, req_size)) { + int32_t iSize = (int32_t)(m_Pos + req_size - m_dwCurrentXRefSteam); + CFX_BinaryBuf buf(iSize); + uint8_t* pBuf = buf.GetBuffer(); + + m_pFileRead->ReadBlock(pBuf, m_dwCurrentXRefSteam, iSize); + + ScopedFileStream file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE)); + m_parser.m_pSyntax->InitParser(file.get(), 0); + + bool bNumber; + CFX_ByteString objnum = m_parser.m_pSyntax->GetNextWord(&bNumber); + if (!bNumber) + return -1; + + FX_DWORD objNum = FXSYS_atoui(objnum); + CPDF_Object* pObj = m_parser.ParseIndirectObjectAt(nullptr, 0, objNum); + if (!pObj) { + m_Pos += m_parser.m_pSyntax->SavePos(); + return 0; + } + + CPDF_Dictionary* pDict = pObj->GetDict(); + CPDF_Name* pName = ToName(pDict ? pDict->GetElement("Type") : nullptr); + if (pName) { + if (pName->GetString() == "XRef") { + m_Pos += m_parser.m_pSyntax->SavePos(); + xref_offset = pObj->GetDict()->GetIntegerBy("Prev"); + pObj->Release(); + return 1; + } + } + pObj->Release(); + return -1; + } + pHints->AddSegment(m_Pos, req_size); + return 0; +} + +inline void CPDF_DataAvail::SetStartOffset(FX_FILESIZE dwOffset) { + m_Pos = dwOffset; +} + +FX_BOOL CPDF_DataAvail::GetNextToken(CFX_ByteString& token) { + uint8_t ch; + if (!GetNextChar(ch)) + return FALSE; + + while (1) { + while (PDFCharIsWhitespace(ch)) { + if (!GetNextChar(ch)) + return FALSE; + } + + if (ch != '%') + break; + + while (1) { + if (!GetNextChar(ch)) + return FALSE; + if (PDFCharIsLineEnding(ch)) + break; + } + } + + uint8_t buffer[256]; + FX_DWORD index = 0; + if (PDFCharIsDelimiter(ch)) { + buffer[index++] = ch; + if (ch == '/') { + while (1) { + if (!GetNextChar(ch)) + return FALSE; + + if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { + m_Pos--; + CFX_ByteString ret(buffer, index); + token = ret; + return TRUE; + } + + if (index < sizeof(buffer)) + buffer[index++] = ch; + } + } else if (ch == '<') { + if (!GetNextChar(ch)) + return FALSE; + + if (ch == '<') + buffer[index++] = ch; + else + m_Pos--; + } else if (ch == '>') { + if (!GetNextChar(ch)) + return FALSE; + + if (ch == '>') + buffer[index++] = ch; + else + m_Pos--; + } + + CFX_ByteString ret(buffer, index); + token = ret; + return TRUE; + } + + while (1) { + if (index < sizeof(buffer)) + buffer[index++] = ch; + + if (!GetNextChar(ch)) + return FALSE; + + if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { + m_Pos--; + break; + } + } + + token = CFX_ByteString(buffer, index); + return TRUE; +} + +FX_BOOL CPDF_DataAvail::GetNextChar(uint8_t& ch) { + FX_FILESIZE pos = m_Pos; + if (pos >= m_dwFileLen) + return FALSE; + + if (m_bufferOffset >= pos || + (FX_FILESIZE)(m_bufferOffset + m_bufferSize) <= pos) { + FX_FILESIZE read_pos = pos; + FX_DWORD read_size = 512; + if ((FX_FILESIZE)read_size > m_dwFileLen) + read_size = (FX_DWORD)m_dwFileLen; + + if ((FX_FILESIZE)(read_pos + read_size) > m_dwFileLen) + read_pos = m_dwFileLen - read_size; + + if (!m_pFileRead->ReadBlock(m_bufferData, read_pos, read_size)) + return FALSE; + + m_bufferOffset = read_pos; + m_bufferSize = read_size; + } + ch = m_bufferData[pos - m_bufferOffset]; + m_Pos++; + return TRUE; +} + +FX_BOOL CPDF_DataAvail::CheckCrossRefItem( + IPDF_DataAvail::DownloadHints* pHints) { + int32_t iSize = 0; + CFX_ByteString token; + while (1) { + if (!GetNextToken(token)) { + iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); + pHints->AddSegment(m_Pos, iSize); + return FALSE; + } + + if (token == "trailer") { + m_dwTrailerOffset = m_Pos; + m_docStatus = PDF_DATAAVAIL_TRAILER; + return TRUE; + } + } +} + +FX_BOOL CPDF_DataAvail::CheckAllCrossRefStream( + IPDF_DataAvail::DownloadHints* pHints) { + FX_FILESIZE xref_offset = 0; + + int32_t nRet = CheckCrossRefStream(pHints, xref_offset); + if (nRet == 1) { + if (!xref_offset) { + m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF; + } else { + m_dwCurrentXRefSteam = xref_offset; + m_Pos = xref_offset; + } + return TRUE; + } + + if (nRet == -1) + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; +} + +FX_BOOL CPDF_DataAvail::CheckCrossRef(IPDF_DataAvail::DownloadHints* pHints) { + int32_t iSize = 0; + CFX_ByteString token; + if (!GetNextToken(token)) { + iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); + pHints->AddSegment(m_Pos, iSize); + return FALSE; + } + + if (token == "xref") { + while (1) { + if (!GetNextToken(token)) { + iSize = + (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); + pHints->AddSegment(m_Pos, iSize); + m_docStatus = PDF_DATAAVAIL_CROSSREF_ITEM; + return FALSE; + } + + if (token == "trailer") { + m_dwTrailerOffset = m_Pos; + m_docStatus = PDF_DATAAVAIL_TRAILER; + return TRUE; + } + } + } else { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + return FALSE; +} + +FX_BOOL CPDF_DataAvail::CheckTrailerAppend( + IPDF_DataAvail::DownloadHints* pHints) { + if (m_Pos < m_dwFileLen) { + FX_FILESIZE dwAppendPos = m_Pos + m_syntaxParser.SavePos(); + int32_t iSize = (int32_t)( + dwAppendPos + 512 > m_dwFileLen ? m_dwFileLen - dwAppendPos : 512); + + if (!m_pFileAvail->IsDataAvail(dwAppendPos, iSize)) { + pHints->AddSegment(dwAppendPos, iSize); + return FALSE; + } + } + + if (m_dwPrevXRefOffset) { + SetStartOffset(m_dwPrevXRefOffset); + m_docStatus = PDF_DATAAVAIL_CROSSREF; + } else { + m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF; + } + return TRUE; +} + +FX_BOOL CPDF_DataAvail::CheckTrailer(IPDF_DataAvail::DownloadHints* pHints) { + int32_t iTrailerSize = + (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); + if (m_pFileAvail->IsDataAvail(m_Pos, iTrailerSize)) { + int32_t iSize = (int32_t)(m_Pos + iTrailerSize - m_dwTrailerOffset); + CFX_BinaryBuf buf(iSize); + uint8_t* pBuf = buf.GetBuffer(); + if (!pBuf) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + if (!m_pFileRead->ReadBlock(pBuf, m_dwTrailerOffset, iSize)) + return FALSE; + + ScopedFileStream file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE)); + m_syntaxParser.InitParser(file.get(), 0); + + std::unique_ptr> pTrailer( + m_syntaxParser.GetObject(nullptr, 0, 0, true)); + if (!pTrailer) { + m_Pos += m_syntaxParser.SavePos(); + pHints->AddSegment(m_Pos, iTrailerSize); + return FALSE; + } + + if (!pTrailer->IsDictionary()) + return FALSE; + + CPDF_Dictionary* pTrailerDict = pTrailer->GetDict(); + CPDF_Object* pEncrypt = pTrailerDict->GetElement("Encrypt"); + if (ToReference(pEncrypt)) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + + FX_DWORD xrefpos = GetDirectInteger(pTrailerDict, "Prev"); + if (xrefpos) { + m_dwPrevXRefOffset = GetDirectInteger(pTrailerDict, "XRefStm"); + if (m_dwPrevXRefOffset) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + } else { + m_dwPrevXRefOffset = xrefpos; + if (m_dwPrevXRefOffset >= m_dwFileLen) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + } else { + SetStartOffset(m_dwPrevXRefOffset); + m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND; + } + } + return TRUE; + } + m_dwPrevXRefOffset = 0; + m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND; + return TRUE; + } + pHints->AddSegment(m_Pos, iTrailerSize); + return FALSE; +} + +FX_BOOL CPDF_DataAvail::CheckPage(int32_t iPage, + IPDF_DataAvail::DownloadHints* pHints) { + while (TRUE) { + switch (m_docStatus) { + case PDF_DATAAVAIL_PAGETREE: + if (!LoadDocPages(pHints)) + return FALSE; + break; + case PDF_DATAAVAIL_PAGE: + if (!LoadDocPage(iPage, pHints)) + return FALSE; + break; + case PDF_DATAAVAIL_ERROR: + return LoadAllFile(pHints); + default: + m_bPagesTreeLoad = TRUE; + m_bPagesLoad = TRUE; + m_bCurPageDictLoadOK = TRUE; + m_docStatus = PDF_DATAAVAIL_PAGE; + return TRUE; + } + } +} + +FX_BOOL CPDF_DataAvail::CheckArrayPageNode( + FX_DWORD dwPageNo, + CPDF_DataAvail::PageNode* pPageNode, + IPDF_DataAvail::DownloadHints* pHints) { + FX_BOOL bExist = FALSE; + CPDF_Object* pPages = GetObject(dwPageNo, pHints, &bExist); + if (!bExist) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + if (!pPages) { + if (m_docStatus == PDF_DATAAVAIL_ERROR) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + return FALSE; + } + + CPDF_Array* pArray = pPages->AsArray(); + if (!pArray) { + pPages->Release(); + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + pPageNode->m_type = PDF_PAGENODE_PAGES; + for (FX_DWORD i = 0; i < pArray->GetCount(); ++i) { + CPDF_Reference* pKid = ToReference(pArray->GetElement(i)); + if (!pKid) + continue; + + PageNode* pNode = new PageNode(); + pPageNode->m_childNode.Add(pNode); + pNode->m_dwPageNo = pKid->GetRefObjNum(); + } + pPages->Release(); + return TRUE; +} + +FX_BOOL CPDF_DataAvail::CheckUnkownPageNode( + FX_DWORD dwPageNo, + CPDF_DataAvail::PageNode* pPageNode, + IPDF_DataAvail::DownloadHints* pHints) { + FX_BOOL bExist = FALSE; + CPDF_Object* pPage = GetObject(dwPageNo, pHints, &bExist); + if (!bExist) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + if (!pPage) { + if (m_docStatus == PDF_DATAAVAIL_ERROR) + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + if (pPage->IsArray()) { + pPageNode->m_dwPageNo = dwPageNo; + pPageNode->m_type = PDF_PAGENODE_ARRAY; + pPage->Release(); + return TRUE; + } + + if (!pPage->IsDictionary()) { + pPage->Release(); + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + pPageNode->m_dwPageNo = dwPageNo; + CPDF_Dictionary* pDict = pPage->GetDict(); + CFX_ByteString type = pDict->GetStringBy("Type"); + if (type == "Pages") { + pPageNode->m_type = PDF_PAGENODE_PAGES; + CPDF_Object* pKids = pDict->GetElement("Kids"); + if (!pKids) { + m_docStatus = PDF_DATAAVAIL_PAGE; + return TRUE; + } + + switch (pKids->GetType()) { + case CPDF_Object::REFERENCE: { + CPDF_Reference* pKid = pKids->AsReference(); + PageNode* pNode = new PageNode(); + pPageNode->m_childNode.Add(pNode); + pNode->m_dwPageNo = pKid->GetRefObjNum(); + } break; + case CPDF_Object::ARRAY: { + CPDF_Array* pKidsArray = pKids->AsArray(); + for (FX_DWORD i = 0; i < pKidsArray->GetCount(); ++i) { + CPDF_Reference* pKid = ToReference(pKidsArray->GetElement(i)); + if (!pKid) + continue; + + PageNode* pNode = new PageNode(); + pPageNode->m_childNode.Add(pNode); + pNode->m_dwPageNo = pKid->GetRefObjNum(); + } + } break; + default: + break; + } + } else if (type == "Page") { + pPageNode->m_type = PDF_PAGENODE_PAGE; + } else { + pPage->Release(); + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + pPage->Release(); + return TRUE; +} + +FX_BOOL CPDF_DataAvail::CheckPageNode(CPDF_DataAvail::PageNode& pageNodes, + int32_t iPage, + int32_t& iCount, + IPDF_DataAvail::DownloadHints* pHints, + int level) { + if (level >= kMaxPageRecursionDepth) + return FALSE; + + int32_t iSize = pageNodes.m_childNode.GetSize(); + if (iSize <= 0 || iPage >= iSize) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + for (int32_t i = 0; i < iSize; ++i) { + PageNode* pNode = pageNodes.m_childNode.GetAt(i); + if (!pNode) + continue; + + switch (pNode->m_type) { + case PDF_PAGENODE_UNKNOWN: + if (!CheckUnkownPageNode(pNode->m_dwPageNo, pNode, pHints)) { + return FALSE; + } + --i; + break; + case PDF_PAGENODE_PAGE: + iCount++; + if (iPage == iCount && m_pDocument) + m_pDocument->m_PageList.SetAt(iPage, pNode->m_dwPageNo); + break; + case PDF_PAGENODE_PAGES: + if (!CheckPageNode(*pNode, iPage, iCount, pHints, level + 1)) + return FALSE; + break; + case PDF_PAGENODE_ARRAY: + if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode, pHints)) + return FALSE; + --i; + break; + } + + if (iPage == iCount) { + m_docStatus = PDF_DATAAVAIL_DONE; + return TRUE; + } + } + return TRUE; +} + +FX_BOOL CPDF_DataAvail::LoadDocPage(int32_t iPage, + IPDF_DataAvail::DownloadHints* pHints) { + if (m_pDocument->GetPageCount() <= iPage || + m_pDocument->m_PageList.GetAt(iPage)) { + m_docStatus = PDF_DATAAVAIL_DONE; + return TRUE; + } + + if (m_pageNodes.m_type == PDF_PAGENODE_PAGE) { + if (iPage == 0) { + m_docStatus = PDF_DATAAVAIL_DONE; + return TRUE; + } + m_docStatus = PDF_DATAAVAIL_ERROR; + return TRUE; + } + int32_t iCount = -1; + return CheckPageNode(m_pageNodes, iPage, iCount, pHints, 0); +} + +FX_BOOL CPDF_DataAvail::CheckPageCount(IPDF_DataAvail::DownloadHints* pHints) { + FX_BOOL bExist = FALSE; + CPDF_Object* pPages = GetObject(m_PagesObjNum, pHints, &bExist); + if (!bExist) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + if (!pPages) + return FALSE; + + CPDF_Dictionary* pPagesDict = pPages->GetDict(); + if (!pPagesDict) { + pPages->Release(); + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + if (!pPagesDict->KeyExist("Kids")) { + pPages->Release(); + return TRUE; + } + + int count = pPagesDict->GetIntegerBy("Count"); + if (count > 0) { + pPages->Release(); + return TRUE; + } + + pPages->Release(); + return FALSE; +} + +FX_BOOL CPDF_DataAvail::LoadDocPages(IPDF_DataAvail::DownloadHints* pHints) { + if (!CheckUnkownPageNode(m_PagesObjNum, &m_pageNodes, pHints)) + return FALSE; + + if (CheckPageCount(pHints)) { + m_docStatus = PDF_DATAAVAIL_PAGE; + return TRUE; + } + + m_bTotalLoadPageTree = TRUE; + return FALSE; +} + +FX_BOOL CPDF_DataAvail::LoadPages(IPDF_DataAvail::DownloadHints* pHints) { + while (!m_bPagesTreeLoad) { + if (!CheckPageStatus(pHints)) + return FALSE; + } + + if (m_bPagesLoad) + return TRUE; + + m_pDocument->LoadPages(); + return FALSE; +} + +IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData( + IPDF_DataAvail::DownloadHints* pHints) { + if (m_bLinearedDataOK) + return DataAvailable; + + if (!m_bMainXRefLoadTried) { + FX_SAFE_DWORD data_size = m_dwFileLen; + data_size -= m_dwLastXRefOffset; + if (!data_size.IsValid()) + return DataError; + + if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, + data_size.ValueOrDie())) { + pHints->AddSegment(m_dwLastXRefOffset, data_size.ValueOrDie()); + return DataNotAvailable; + } + + CPDF_Parser::Error eRet = + m_pDocument->GetParser()->LoadLinearizedMainXRefTable(); + m_bMainXRefLoadTried = TRUE; + if (eRet != CPDF_Parser::SUCCESS) + return DataError; + + if (!PreparePageItem()) + return DataNotAvailable; + + m_bMainXRefLoadedOK = TRUE; + m_bLinearedDataOK = TRUE; + } + + return m_bLinearedDataOK ? DataAvailable : DataNotAvailable; +} + +FX_BOOL CPDF_DataAvail::CheckPageAnnots(int32_t iPage, + IPDF_DataAvail::DownloadHints* pHints) { + if (!m_objs_array.GetSize()) { + m_objs_array.RemoveAll(); + m_ObjectSet.clear(); + + CPDF_Dictionary* pPageDict = m_pDocument->GetPage(iPage); + if (!pPageDict) + return TRUE; + + CPDF_Object* pAnnots = pPageDict->GetElement("Annots"); + if (!pAnnots) + return TRUE; + + CFX_ArrayTemplate obj_array; + obj_array.Add(pAnnots); + + FX_BOOL bRet = IsObjectsAvail(obj_array, FALSE, pHints, m_objs_array); + if (bRet) + m_objs_array.RemoveAll(); + + return bRet; + } + + CFX_ArrayTemplate new_objs_array; + FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); + m_objs_array.RemoveAll(); + if (!bRet) + m_objs_array.Append(new_objs_array); + + return bRet; +} + +IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedFirstPage( + int32_t iPage, + IPDF_DataAvail::DownloadHints* pHints) { + if (!m_bAnnotsLoad) { + if (!CheckPageAnnots(iPage, pHints)) + return DataNotAvailable; + m_bAnnotsLoad = TRUE; + } + + DocAvailStatus nRet = CheckLinearizedData(pHints); + if (nRet == DataAvailable) + m_bPageLoadedOK = FALSE; + return nRet; +} + +FX_BOOL CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary* pDict) { + CFX_AutoRestorer restorer(&s_CurrentDataAvailRecursionDepth); + if (++s_CurrentDataAvailRecursionDepth > kMaxDataAvailRecursionDepth) + return FALSE; + + CPDF_Object* pParent = pDict->GetElement("Parent"); + if (!pParent) + return FALSE; + + CPDF_Dictionary* pParentDict = pParent->GetDict(); + if (!pParentDict) + return FALSE; + + CPDF_Object* pRet = pParentDict->GetElement("Resources"); + if (pRet) { + m_pPageResource = pRet; + return TRUE; + } + + return HaveResourceAncestor(pParentDict); +} + +IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail( + int32_t iPage, + IPDF_DataAvail::DownloadHints* pHints) { + if (!m_pDocument) + return DataError; + + if (IsFirstCheck(iPage)) { + m_bCurPageDictLoadOK = FALSE; + m_bPageLoadedOK = FALSE; + m_bAnnotsLoad = FALSE; + m_bNeedDownLoadResource = FALSE; + m_objs_array.RemoveAll(); + m_ObjectSet.clear(); + } + + if (pdfium::ContainsKey(m_pagesLoadState, iPage)) + return DataAvailable; + + if (m_bLinearized) { + if ((FX_DWORD)iPage == m_dwFirstPageNo) { + DocAvailStatus nRet = CheckLinearizedFirstPage(iPage, pHints); + if (nRet == DataAvailable) + m_pagesLoadState.insert(iPage); + return nRet; + } + + DocAvailStatus nResult = CheckLinearizedData(pHints); + if (nResult != DataAvailable) + return nResult; + + if (m_pHintTables) { + nResult = m_pHintTables->CheckPage(iPage, pHints); + if (nResult != DataAvailable) + return nResult; + m_pagesLoadState.insert(iPage); + return DataAvailable; + } + + if (m_bMainXRefLoadedOK) { + if (m_bTotalLoadPageTree) { + if (!LoadPages(pHints)) + return DataNotAvailable; + } else { + if (!m_bCurPageDictLoadOK && !CheckPage(iPage, pHints)) + return DataNotAvailable; + } + } else { + if (!LoadAllFile(pHints)) + return DataNotAvailable; + m_pDocument->GetParser()->RebuildCrossRef(); + ResetFirstCheck(iPage); + return DataAvailable; + } + } else { + if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && + !CheckPage(iPage, pHints)) { + return DataNotAvailable; + } + } + + if (m_bHaveAcroForm && !m_bAcroFormLoad) { + if (!CheckAcroFormSubObject(pHints)) + return DataNotAvailable; + m_bAcroFormLoad = TRUE; + } + + if (!m_bPageLoadedOK) { + if (!m_objs_array.GetSize()) { + m_objs_array.RemoveAll(); + m_ObjectSet.clear(); + + m_pPageDict = m_pDocument->GetPage(iPage); + if (!m_pPageDict) { + ResetFirstCheck(iPage); + return DataAvailable; + } + + CFX_ArrayTemplate obj_array; + obj_array.Add(m_pPageDict); + FX_BOOL bRet = IsObjectsAvail(obj_array, TRUE, pHints, m_objs_array); + if (!bRet) + return DataNotAvailable; + + m_objs_array.RemoveAll(); + } else { + CFX_ArrayTemplate new_objs_array; + FX_BOOL bRet = + IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); + + m_objs_array.RemoveAll(); + if (!bRet) { + m_objs_array.Append(new_objs_array); + return DataNotAvailable; + } + } + m_bPageLoadedOK = TRUE; + } + + if (!m_bAnnotsLoad) { + if (!CheckPageAnnots(iPage, pHints)) + return DataNotAvailable; + m_bAnnotsLoad = TRUE; + } + + if (m_pPageDict && !m_bNeedDownLoadResource) { + m_pPageResource = m_pPageDict->GetElement("Resources"); + if (!m_pPageResource) + m_bNeedDownLoadResource = HaveResourceAncestor(m_pPageDict); + else + m_bNeedDownLoadResource = TRUE; + } + + if (m_bNeedDownLoadResource) { + FX_BOOL bRet = CheckResources(pHints); + if (!bRet) + return DataNotAvailable; + m_bNeedDownLoadResource = FALSE; + } + + m_bPageLoadedOK = FALSE; + m_bAnnotsLoad = FALSE; + m_bCurPageDictLoadOK = FALSE; + + ResetFirstCheck(iPage); + m_pagesLoadState.insert(iPage); + return DataAvailable; +} + +FX_BOOL CPDF_DataAvail::CheckResources(IPDF_DataAvail::DownloadHints* pHints) { + if (!m_objs_array.GetSize()) { + m_objs_array.RemoveAll(); + CFX_ArrayTemplate obj_array; + obj_array.Add(m_pPageResource); + + FX_BOOL bRet = IsObjectsAvail(obj_array, TRUE, pHints, m_objs_array); + if (bRet) + m_objs_array.RemoveAll(); + return bRet; + } + + CFX_ArrayTemplate new_objs_array; + FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); + m_objs_array.RemoveAll(); + if (!bRet) + m_objs_array.Append(new_objs_array); + return bRet; +} + +void CPDF_DataAvail::GetLinearizedMainXRefInfo(FX_FILESIZE* pPos, + FX_DWORD* pSize) { + if (pPos) + *pPos = m_dwLastXRefOffset; + if (pSize) + *pSize = (FX_DWORD)(m_dwFileLen - m_dwLastXRefOffset); +} + +int CPDF_DataAvail::GetPageCount() const { + if (m_pLinearized) { + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + CPDF_Object* pObj = pDict ? pDict->GetElementValue("N") : nullptr; + return pObj ? pObj->GetInteger() : 0; + } + return m_pDocument ? m_pDocument->GetPageCount() : 0; +} + +CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) { + if (!m_pDocument || index < 0 || index >= GetPageCount()) + return nullptr; + + if (m_pLinearized) { + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + CPDF_Object* pObj = pDict ? pDict->GetElementValue("P") : nullptr; + + int pageNum = pObj ? pObj->GetInteger() : 0; + if (m_pHintTables && index != pageNum) { + FX_FILESIZE szPageStartPos = 0; + FX_FILESIZE szPageLength = 0; + FX_DWORD dwObjNum = 0; + FX_BOOL bPagePosGot = m_pHintTables->GetPagePos(index, szPageStartPos, + szPageLength, dwObjNum); + if (!bPagePosGot) + return nullptr; + + m_syntaxParser.InitParser(m_pFileRead, (FX_DWORD)szPageStartPos); + CPDF_Object* pPageDict = ParseIndirectObjectAt(0, dwObjNum, m_pDocument); + if (!pPageDict) + return nullptr; + + if (!m_pDocument->InsertIndirectObject(dwObjNum, pPageDict)) + return nullptr; + return pPageDict->GetDict(); + } + } + return m_pDocument->GetPage(index); +} + +IPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail( + IPDF_DataAvail::DownloadHints* pHints) { + if (!m_pDocument) + return FormAvailable; + + if (!m_bLinearizedFormParamLoad) { + CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); + if (!pRoot) + return FormAvailable; + + CPDF_Object* pAcroForm = pRoot->GetElement("AcroForm"); + if (!pAcroForm) + return FormNotExist; + + DocAvailStatus nDocStatus = CheckLinearizedData(pHints); + if (nDocStatus == DataError) + return FormError; + if (nDocStatus == DataNotAvailable) + return FormNotAvailable; + + if (!m_objs_array.GetSize()) + m_objs_array.Add(pAcroForm->GetDict()); + m_bLinearizedFormParamLoad = TRUE; + } + + CFX_ArrayTemplate new_objs_array; + FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); + m_objs_array.RemoveAll(); + if (!bRet) { + m_objs_array.Append(new_objs_array); + return FormNotAvailable; + } + return FormAvailable; +} + +CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {} + +CPDF_DataAvail::PageNode::~PageNode() { + for (int32_t i = 0; i < m_childNode.GetSize(); ++i) + delete m_childNode[i]; + m_childNode.RemoveAll(); +} -- cgit v1.2.3