// Copyright 2016 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #ifndef CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_ #define CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_ #include #include "core/fpdfapi/parser/cpdf_parser.h" #include "core/fpdfapi/parser/cpdf_syntax_parser.h" #include "core/fxcrt/fx_basic.h" class CPDF_Dictionary; class CPDF_HintTables; class CPDF_IndirectObjectHolder; class CPDF_Parser; enum PDF_DATAAVAIL_STATUS { PDF_DATAAVAIL_HEADER = 0, PDF_DATAAVAIL_FIRSTPAGE, PDF_DATAAVAIL_FIRSTPAGE_PREPARE, PDF_DATAAVAIL_HINTTABLE, PDF_DATAAVAIL_END, PDF_DATAAVAIL_CROSSREF, PDF_DATAAVAIL_CROSSREF_ITEM, PDF_DATAAVAIL_CROSSREF_STREAM, PDF_DATAAVAIL_TRAILER, PDF_DATAAVAIL_LOADALLCROSSREF, PDF_DATAAVAIL_ROOT, PDF_DATAAVAIL_INFO, PDF_DATAAVAIL_ACROFORM, PDF_DATAAVAIL_ACROFORM_SUBOBJECT, PDF_DATAAVAIL_PAGETREE, PDF_DATAAVAIL_PAGE, PDF_DATAAVAIL_PAGE_LATERLOAD, PDF_DATAAVAIL_RESOURCES, PDF_DATAAVAIL_DONE, PDF_DATAAVAIL_ERROR, PDF_DATAAVAIL_LOADALLFILE, PDF_DATAAVAIL_TRAILER_APPEND }; enum PDF_PAGENODE_TYPE { PDF_PAGENODE_UNKNOWN = 0, PDF_PAGENODE_PAGE, PDF_PAGENODE_PAGES, PDF_PAGENODE_ARRAY, }; class CPDF_DataAvail final { public: // Must match PDF_DATA_* definitions in public/fpdf_dataavail.h, but cannot // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts // to make sure the two sets of values match. enum DocAvailStatus { DataError = -1, // PDF_DATA_ERROR DataNotAvailable = 0, // PDF_DATA_NOTAVAIL DataAvailable = 1, // PDF_DATA_AVAIL }; // Must match PDF_*LINEAR* definitions in public/fpdf_dataavail.h, but cannot // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts // to make sure the two sets of values match. enum DocLinearizationStatus { LinearizationUnknown = -1, // PDF_LINEARIZATION_UNKNOWN NotLinearized = 0, // PDF_NOT_LINEARIZED Linearized = 1, // PDF_LINEARIZED }; // Must match PDF_FORM_* definitions in public/fpdf_dataavail.h, but cannot // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts // to make sure the two sets of values match. enum DocFormStatus { FormError = -1, // PDF_FORM_ERROR FormNotAvailable = 0, // PDF_FORM_NOTAVAIL FormAvailable = 1, // PDF_FORM_AVAIL FormNotExist = 2, // PDF_FORM_NOTEXIST }; class FileAvail { public: virtual ~FileAvail(); virtual bool IsDataAvail(FX_FILESIZE offset, uint32_t size) = 0; }; class DownloadHints { public: virtual ~DownloadHints(); virtual void AddSegment(FX_FILESIZE offset, uint32_t size) = 0; }; CPDF_DataAvail(FileAvail* pFileAvail, IFX_SeekableReadStream* pFileRead, bool bSupportHintTable); ~CPDF_DataAvail(); DocAvailStatus IsDocAvail(DownloadHints* pHints); void SetDocument(CPDF_Document* pDoc); DocAvailStatus IsPageAvail(uint32_t dwPage, DownloadHints* pHints); DocFormStatus IsFormAvail(DownloadHints* pHints); DocLinearizationStatus IsLinearizedPDF(); bool IsLinearized(); void GetLinearizedMainXRefInfo(FX_FILESIZE* pPos, uint32_t* pSize); IFX_SeekableReadStream* GetFileRead() const { return m_pFileRead; } int GetPageCount() const; CPDF_Dictionary* GetPage(int index); friend class CPDF_HintTables; protected: class PageNode { public: PageNode(); ~PageNode(); PDF_PAGENODE_TYPE m_type; uint32_t m_dwPageNo; CFX_ArrayTemplate m_childNode; }; static const int kMaxDataAvailRecursionDepth = 64; static int s_CurrentDataAvailRecursionDepth; static const int kMaxPageRecursionDepth = 1024; uint32_t GetObjectSize(uint32_t objnum, FX_FILESIZE& offset); bool AreObjectsAvailable(std::vector& obj_array, bool bParsePage, DownloadHints* pHints, std::vector& ret_array); bool CheckDocStatus(DownloadHints* pHints); bool CheckHeader(DownloadHints* pHints); bool CheckFirstPage(DownloadHints* pHints); bool CheckHintTables(DownloadHints* pHints); bool CheckEnd(DownloadHints* pHints); bool CheckCrossRef(DownloadHints* pHints); bool CheckCrossRefItem(DownloadHints* pHints); bool CheckTrailer(DownloadHints* pHints); bool CheckRoot(DownloadHints* pHints); bool CheckInfo(DownloadHints* pHints); bool CheckPages(DownloadHints* pHints); bool CheckPage(DownloadHints* pHints); bool CheckResources(DownloadHints* pHints); bool CheckAnnots(DownloadHints* pHints); bool CheckAcroForm(DownloadHints* pHints); bool CheckAcroFormSubObject(DownloadHints* pHints); bool CheckTrailerAppend(DownloadHints* pHints); bool CheckPageStatus(DownloadHints* pHints); bool CheckAllCrossRefStream(DownloadHints* pHints); int32_t CheckCrossRefStream(DownloadHints* pHints, FX_FILESIZE& xref_offset); bool IsLinearizedFile(uint8_t* pData, uint32_t dwLen); void SetStartOffset(FX_FILESIZE dwOffset); bool GetNextToken(CFX_ByteString& token); bool GetNextChar(uint8_t& ch); CPDF_Object* ParseIndirectObjectAt( FX_FILESIZE pos, uint32_t objnum, CPDF_IndirectObjectHolder* pObjList = nullptr); CPDF_Object* GetObject(uint32_t objnum, DownloadHints* pHints, bool* pExistInFile); bool GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages); bool PreparePageItem(); bool LoadPages(DownloadHints* pHints); bool LoadAllXref(DownloadHints* pHints); bool LoadAllFile(DownloadHints* pHints); DocAvailStatus CheckLinearizedData(DownloadHints* pHints); bool CheckPageAnnots(uint32_t dwPage, DownloadHints* pHints); DocAvailStatus CheckLinearizedFirstPage(uint32_t dwPage, DownloadHints* pHints); bool HaveResourceAncestor(CPDF_Dictionary* pDict); bool CheckPage(uint32_t dwPage, DownloadHints* pHints); bool LoadDocPages(DownloadHints* pHints); bool LoadDocPage(uint32_t dwPage, DownloadHints* pHints); bool CheckPageNode(PageNode& pageNodes, int32_t iPage, int32_t& iCount, DownloadHints* pHints, int level); bool CheckUnkownPageNode(uint32_t dwPageNo, PageNode* pPageNode, DownloadHints* pHints); bool CheckArrayPageNode(uint32_t dwPageNo, PageNode* pPageNode, DownloadHints* pHints); bool CheckPageCount(DownloadHints* pHints); bool IsFirstCheck(uint32_t dwPage); void ResetFirstCheck(uint32_t dwPage); bool IsDataAvail(FX_FILESIZE offset, uint32_t size, DownloadHints* pHints); FileAvail* const m_pFileAvail; IFX_SeekableReadStream* const m_pFileRead; CPDF_Parser m_parser; CPDF_SyntaxParser m_syntaxParser; CPDF_Object* m_pRoot; uint32_t m_dwRootObjNum; uint32_t m_dwInfoObjNum; CPDF_Object* m_pLinearized; CPDF_Object* m_pTrailer; bool m_bDocAvail; FX_FILESIZE m_dwHeaderOffset; FX_FILESIZE m_dwLastXRefOffset; FX_FILESIZE m_dwXRefOffset; FX_FILESIZE m_dwTrailerOffset; FX_FILESIZE m_dwCurrentOffset; PDF_DATAAVAIL_STATUS m_docStatus; FX_FILESIZE m_dwFileLen; CPDF_Document* m_pDocument; std::set m_ObjectSet; std::vector m_objs_array; FX_FILESIZE m_Pos; FX_FILESIZE m_bufferOffset; uint32_t m_bufferSize; CFX_ByteString m_WordBuf; uint8_t m_bufferData[512]; CFX_ArrayTemplate m_XRefStreamList; CFX_ArrayTemplate m_PageObjList; uint32_t m_PagesObjNum; bool m_bLinearized; uint32_t m_dwFirstPageNo; bool m_bLinearedDataOK; bool m_bMainXRefLoadTried; bool m_bMainXRefLoadedOK; bool m_bPagesTreeLoad; bool m_bPagesLoad; CPDF_Parser* m_pCurrentParser; FX_FILESIZE m_dwCurrentXRefSteam; bool m_bAnnotsLoad; bool m_bHaveAcroForm; uint32_t m_dwAcroFormObjNum; bool m_bAcroFormLoad; CPDF_Object* m_pAcroForm; std::vector m_arrayAcroforms; CPDF_Dictionary* m_pPageDict; CPDF_Object* m_pPageResource; bool m_bNeedDownLoadResource; bool m_bPageLoadedOK; bool m_bLinearizedFormParamLoad; std::vector m_PagesArray; uint32_t m_dwEncryptObjNum; FX_FILESIZE m_dwPrevXRefOffset; bool m_bTotalLoadPageTree; bool m_bCurPageDictLoadOK; PageNode m_pageNodes; std::set m_pageMapCheckState; std::set m_pagesLoadState; std::unique_ptr m_pHintTables; bool m_bSupportHintTable; }; #endif // CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_