diff options
Diffstat (limited to 'core/include/fpdfapi/cpdf_parser.h')
-rw-r--r-- | core/include/fpdfapi/cpdf_parser.h | 173 |
1 files changed, 173 insertions, 0 deletions
diff --git a/core/include/fpdfapi/cpdf_parser.h b/core/include/fpdfapi/cpdf_parser.h new file mode 100644 index 0000000000..80a1107743 --- /dev/null +++ b/core/include/fpdfapi/cpdf_parser.h @@ -0,0 +1,173 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_INCLUDE_FPDFAPI_CPDF_PARSER_H_ +#define CORE_INCLUDE_FPDFAPI_CPDF_PARSER_H_ + +#include <map> +#include <memory> +#include <set> + +#include "core/include/fxcrt/fx_basic.h" + +class CPDF_Array; +class CPDF_CryptoHandler; +class CPDF_Dictionary; +class CPDF_Document; +class CPDF_IndirectObjectHolder; +class CPDF_Object; +class CPDF_StreamAcc; +class CPDF_SyntaxParser; +class IFX_FileRead; +class IPDF_SecurityHandler; + +class CPDF_Parser { + public: + enum Error { + SUCCESS = 0, + FILE_ERROR, + FORMAT_ERROR, + PASSWORD_ERROR, + HANDLER_ERROR + }; + + CPDF_Parser(); + ~CPDF_Parser(); + + Error StartParse(IFX_FileRead* pFile); + FX_DWORD GetPermissions(FX_BOOL bCheckRevision = FALSE); + + void SetPassword(const FX_CHAR* password) { m_Password = password; } + CFX_ByteString GetPassword() { return m_Password; } + CPDF_Dictionary* GetTrailer() const { return m_pTrailer; } + FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; } + CPDF_Document* GetDocument() const { return m_pDocument; } + + FX_DWORD GetRootObjNum(); + FX_DWORD GetInfoObjNum(); + CPDF_Array* GetIDArray(); + + CPDF_Dictionary* GetEncryptDict() const { return m_pEncryptDict; } + + CPDF_Object* ParseIndirectObject(CPDF_IndirectObjectHolder* pObjList, + FX_DWORD objnum); + + FX_DWORD GetLastObjNum() const; + bool IsValidObjectNumber(FX_DWORD objnum) const; + FX_FILESIZE GetObjectPositionOrZero(FX_DWORD objnum) const; + uint8_t GetObjectType(FX_DWORD objnum) const; + uint16_t GetObjectGenNum(FX_DWORD objnum) const; + bool IsVersionUpdated() const { return m_bVersionUpdated; } + bool IsObjectFreeOrNull(FX_DWORD objnum) const; + FX_BOOL IsFormStream(FX_DWORD objnum, FX_BOOL& bForm); + CPDF_CryptoHandler* GetCryptoHandler(); + IFX_FileRead* GetFileAccess() const; + + FX_FILESIZE GetObjectOffset(FX_DWORD objnum) const; + FX_FILESIZE GetObjectSize(FX_DWORD objnum) const; + + void GetIndirectBinary(FX_DWORD objnum, uint8_t*& pBuffer, FX_DWORD& size); + int GetFileVersion() const { return m_FileVersion; } + FX_BOOL IsXRefStream() const { return m_bXRefStream; } + + CPDF_Object* ParseIndirectObjectAt(CPDF_IndirectObjectHolder* pObjList, + FX_FILESIZE pos, + FX_DWORD objnum); + + CPDF_Object* ParseIndirectObjectAtByStrict( + CPDF_IndirectObjectHolder* pObjList, + FX_FILESIZE pos, + FX_DWORD objnum, + FX_FILESIZE* pResultPos); + + Error StartAsyncParse(IFX_FileRead* pFile); + + FX_DWORD GetFirstPageNo() const { return m_dwFirstPageNo; } + + protected: + struct ObjectInfo { + ObjectInfo() : pos(0), type(0), gennum(0) {} + + FX_FILESIZE pos; + uint8_t type; + uint16_t gennum; + }; + + void CloseParser(); + CPDF_Object* ParseDirect(CPDF_Object* pObj); + FX_BOOL LoadAllCrossRefV4(FX_FILESIZE pos); + FX_BOOL LoadAllCrossRefV5(FX_FILESIZE pos); + bool LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip); + FX_BOOL LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef); + CPDF_Dictionary* LoadTrailerV4(); + FX_BOOL RebuildCrossRef(); + Error SetEncryptHandler(); + void ReleaseEncryptHandler(); + FX_BOOL LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount); + FX_BOOL LoadLinearizedCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount); + FX_BOOL LoadLinearizedAllCrossRefV5(FX_FILESIZE pos); + Error LoadLinearizedMainXRefTable(); + CPDF_StreamAcc* GetObjectStream(FX_DWORD number); + FX_BOOL IsLinearizedFile(IFX_FileRead* pFileAccess, FX_DWORD offset); + void SetEncryptDictionary(CPDF_Dictionary* pDict); + void ShrinkObjectMap(FX_DWORD size); + + CPDF_Document* m_pDocument; + std::unique_ptr<CPDF_SyntaxParser> m_pSyntax; + bool m_bOwnFileRead; + int m_FileVersion; + CPDF_Dictionary* m_pTrailer; + CPDF_Dictionary* m_pEncryptDict; + FX_FILESIZE m_LastXRefOffset; + FX_BOOL m_bXRefStream; + std::unique_ptr<IPDF_SecurityHandler> m_pSecurityHandler; + CFX_ByteString m_bsRecipient; + CFX_ByteString m_FilePath; + CFX_ByteString m_Password; + std::map<FX_DWORD, ObjectInfo> m_ObjectInfo; + std::set<FX_FILESIZE> m_SortedOffset; + CFX_ArrayTemplate<CPDF_Dictionary*> m_Trailers; + FX_BOOL m_bVersionUpdated; + CPDF_Object* m_pLinearized; + FX_DWORD m_dwFirstPageNo; + FX_DWORD m_dwXrefStartObjNum; + + // A map of object numbers to indirect streams. Map owns the streams. + std::map<FX_DWORD, std::unique_ptr<CPDF_StreamAcc>> m_ObjectStreamMap; + + // Mapping of object numbers to offsets. The offsets are relative to the first + // object in the stream. + using StreamObjectCache = std::map<FX_DWORD, FX_DWORD>; + + // Mapping of streams to their object caches. This is valid as long as the + // streams in |m_ObjectStreamMap| are valid. + std::map<CPDF_StreamAcc*, StreamObjectCache> m_ObjCache; + + // All indirect object numbers that are being parsed. + std::set<FX_DWORD> m_ParsingObjNums; + + friend class CPDF_DataAvail; + + private: + enum class ParserState { + kDefault, + kComment, + kWhitespace, + kString, + kHexString, + kEscapedString, + kXref, + kObjNum, + kPostObjNum, + kGenNum, + kPostGenNum, + kTrailer, + kBeginObj, + kEndObj + }; +}; + +#endif // CORE_INCLUDE_FPDFAPI_CPDF_PARSER_H_ |