diff options
Diffstat (limited to 'core/fpdfapi/parser/cpdf_parser.h')
-rw-r--r-- | core/fpdfapi/parser/cpdf_parser.h | 177 |
1 files changed, 177 insertions, 0 deletions
diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h new file mode 100644 index 0000000000..876ea6cb3d --- /dev/null +++ b/core/fpdfapi/parser/cpdf_parser.h @@ -0,0 +1,177 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_FPDFAPI_PARSER_CPDF_PARSER_H_ +#define CORE_FPDFAPI_PARSER_CPDF_PARSER_H_ + +#include <map> +#include <memory> +#include <set> + +#include "core/fxcrt/fx_basic.h" + +class CPDF_Array; +class CPDF_CryptoHandler; +class CPDF_Dictionary; +class CPDF_Document; +class CPDF_IndirectObjectHolder; +class CPDF_Object; +class CPDF_SecurityHandler; +class CPDF_StreamAcc; +class CPDF_SyntaxParser; +class IFX_FileRead; + +class CPDF_Parser { + public: + enum Error { + SUCCESS = 0, + FILE_ERROR, + FORMAT_ERROR, + PASSWORD_ERROR, + HANDLER_ERROR + }; + + // A limit on the maximum object number in the xref table. Theoretical limits + // are higher, but this may be large enough in practice. + static const uint32_t kMaxObjectNumber = 1048576; + + CPDF_Parser(); + ~CPDF_Parser(); + + Error StartParse(IFX_FileRead* pFile, CPDF_Document* pDocument); + Error StartLinearizedParse(IFX_FileRead* pFile, CPDF_Document* pDocument); + + void SetPassword(const FX_CHAR* password) { m_Password = password; } + CFX_ByteString GetPassword() { return m_Password; } + CPDF_Dictionary* GetTrailer() const { return m_pTrailer; } + FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; } + + uint32_t GetPermissions() const; + uint32_t GetRootObjNum(); + uint32_t GetInfoObjNum(); + CPDF_Array* GetIDArray(); + + CPDF_Dictionary* GetEncryptDict() const { return m_pEncryptDict; } + + CPDF_Object* ParseIndirectObject(CPDF_IndirectObjectHolder* pObjList, + uint32_t objnum); + + uint32_t GetLastObjNum() const; + bool IsValidObjectNumber(uint32_t objnum) const; + FX_FILESIZE GetObjectPositionOrZero(uint32_t objnum) const; + uint8_t GetObjectType(uint32_t objnum) const; + uint16_t GetObjectGenNum(uint32_t objnum) const; + bool IsVersionUpdated() const { return m_bVersionUpdated; } + bool IsObjectFreeOrNull(uint32_t objnum) const; + CPDF_CryptoHandler* GetCryptoHandler(); + IFX_FileRead* GetFileAccess() const; + + FX_FILESIZE GetObjectOffset(uint32_t objnum) const; + FX_FILESIZE GetObjectSize(uint32_t objnum) const; + + void GetIndirectBinary(uint32_t objnum, uint8_t*& pBuffer, uint32_t& size); + int GetFileVersion() const { return m_FileVersion; } + FX_BOOL IsXRefStream() const { return m_bXRefStream; } + + CPDF_Object* ParseIndirectObjectAt(CPDF_IndirectObjectHolder* pObjList, + FX_FILESIZE pos, + uint32_t objnum); + + CPDF_Object* ParseIndirectObjectAtByStrict( + CPDF_IndirectObjectHolder* pObjList, + FX_FILESIZE pos, + uint32_t objnum, + FX_FILESIZE* pResultPos); + + uint32_t GetFirstPageNo() const { return m_dwFirstPageNo; } + + protected: + struct ObjectInfo { + ObjectInfo() : pos(0), type(0), gennum(0) {} + + FX_FILESIZE pos; + uint8_t type; + uint16_t gennum; + }; + + std::unique_ptr<CPDF_SyntaxParser> m_pSyntax; + std::map<uint32_t, ObjectInfo> m_ObjectInfo; + + bool LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip); + FX_BOOL RebuildCrossRef(); + + private: + friend class CPDF_DataAvail; + + enum class ParserState { + kDefault, + kComment, + kWhitespace, + kString, + kHexString, + kEscapedString, + kXref, + kObjNum, + kPostObjNum, + kGenNum, + kPostGenNum, + kTrailer, + kBeginObj, + kEndObj + }; + + CPDF_Object* ParseDirect(CPDF_Object* pObj); + FX_BOOL LoadAllCrossRefV4(FX_FILESIZE pos); + FX_BOOL LoadAllCrossRefV5(FX_FILESIZE pos); + FX_BOOL LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef); + CPDF_Dictionary* LoadTrailerV4(); + Error SetEncryptHandler(); + void ReleaseEncryptHandler(); + FX_BOOL LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, uint32_t dwObjCount); + FX_BOOL LoadLinearizedCrossRefV4(FX_FILESIZE pos, uint32_t dwObjCount); + FX_BOOL LoadLinearizedAllCrossRefV5(FX_FILESIZE pos); + Error LoadLinearizedMainXRefTable(); + CPDF_StreamAcc* GetObjectStream(uint32_t number); + FX_BOOL IsLinearizedFile(IFX_FileRead* pFileAccess, uint32_t offset); + void SetEncryptDictionary(CPDF_Dictionary* pDict); + void ShrinkObjectMap(uint32_t size); + // A simple check whether the cross reference table matches with + // the objects. + bool VerifyCrossRefV4(); + + CPDF_Document* m_pDocument; // not owned + bool m_bHasParsed; + bool m_bOwnFileRead; + int m_FileVersion; + CPDF_Dictionary* m_pTrailer; + CPDF_Dictionary* m_pEncryptDict; + FX_FILESIZE m_LastXRefOffset; + FX_BOOL m_bXRefStream; + std::unique_ptr<CPDF_SecurityHandler> m_pSecurityHandler; + CFX_ByteString m_Password; + std::set<FX_FILESIZE> m_SortedOffset; + CFX_ArrayTemplate<CPDF_Dictionary*> m_Trailers; + bool m_bVersionUpdated; + CPDF_Object* m_pLinearized; + uint32_t m_dwFirstPageNo; + uint32_t m_dwXrefStartObjNum; + + // A map of object numbers to indirect streams. Map owns the streams. + std::map<uint32_t, std::unique_ptr<CPDF_StreamAcc>> m_ObjectStreamMap; + + // Mapping of object numbers to offsets. The offsets are relative to the first + // object in the stream. + using StreamObjectCache = std::map<uint32_t, uint32_t>; + + // Mapping of streams to their object caches. This is valid as long as the + // streams in |m_ObjectStreamMap| are valid. + std::map<CPDF_StreamAcc*, StreamObjectCache> m_ObjCache; + + // All indirect object numbers that are being parsed. + std::set<uint32_t> m_ParsingObjNums; +}; + +#endif // CORE_FPDFAPI_PARSER_CPDF_PARSER_H_ |