From 488b7ad845d6de212d89cd957303b294ecfa5922 Mon Sep 17 00:00:00 2001 From: dsinclair Date: Tue, 4 Oct 2016 11:55:50 -0700 Subject: Move core/fpdfapi/fpdf_parser to core/fpdfapi/parser BUG=pdfium:603 Review-Url: https://codereview.chromium.org/2392603004 --- core/fpdfapi/parser/cpdf_parser.h | 177 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 core/fpdfapi/parser/cpdf_parser.h (limited to 'core/fpdfapi/parser/cpdf_parser.h') diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h new file mode 100644 index 0000000000..876ea6cb3d --- /dev/null +++ b/core/fpdfapi/parser/cpdf_parser.h @@ -0,0 +1,177 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_FPDFAPI_PARSER_CPDF_PARSER_H_ +#define CORE_FPDFAPI_PARSER_CPDF_PARSER_H_ + +#include +#include +#include + +#include "core/fxcrt/fx_basic.h" + +class CPDF_Array; +class CPDF_CryptoHandler; +class CPDF_Dictionary; +class CPDF_Document; +class CPDF_IndirectObjectHolder; +class CPDF_Object; +class CPDF_SecurityHandler; +class CPDF_StreamAcc; +class CPDF_SyntaxParser; +class IFX_FileRead; + +class CPDF_Parser { + public: + enum Error { + SUCCESS = 0, + FILE_ERROR, + FORMAT_ERROR, + PASSWORD_ERROR, + HANDLER_ERROR + }; + + // A limit on the maximum object number in the xref table. Theoretical limits + // are higher, but this may be large enough in practice. + static const uint32_t kMaxObjectNumber = 1048576; + + CPDF_Parser(); + ~CPDF_Parser(); + + Error StartParse(IFX_FileRead* pFile, CPDF_Document* pDocument); + Error StartLinearizedParse(IFX_FileRead* pFile, CPDF_Document* pDocument); + + void SetPassword(const FX_CHAR* password) { m_Password = password; } + CFX_ByteString GetPassword() { return m_Password; } + CPDF_Dictionary* GetTrailer() const { return m_pTrailer; } + FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; } + + uint32_t GetPermissions() const; + uint32_t GetRootObjNum(); + uint32_t GetInfoObjNum(); + CPDF_Array* GetIDArray(); + + CPDF_Dictionary* GetEncryptDict() const { return m_pEncryptDict; } + + CPDF_Object* ParseIndirectObject(CPDF_IndirectObjectHolder* pObjList, + uint32_t objnum); + + uint32_t GetLastObjNum() const; + bool IsValidObjectNumber(uint32_t objnum) const; + FX_FILESIZE GetObjectPositionOrZero(uint32_t objnum) const; + uint8_t GetObjectType(uint32_t objnum) const; + uint16_t GetObjectGenNum(uint32_t objnum) const; + bool IsVersionUpdated() const { return m_bVersionUpdated; } + bool IsObjectFreeOrNull(uint32_t objnum) const; + CPDF_CryptoHandler* GetCryptoHandler(); + IFX_FileRead* GetFileAccess() const; + + FX_FILESIZE GetObjectOffset(uint32_t objnum) const; + FX_FILESIZE GetObjectSize(uint32_t objnum) const; + + void GetIndirectBinary(uint32_t objnum, uint8_t*& pBuffer, uint32_t& size); + int GetFileVersion() const { return m_FileVersion; } + FX_BOOL IsXRefStream() const { return m_bXRefStream; } + + CPDF_Object* ParseIndirectObjectAt(CPDF_IndirectObjectHolder* pObjList, + FX_FILESIZE pos, + uint32_t objnum); + + CPDF_Object* ParseIndirectObjectAtByStrict( + CPDF_IndirectObjectHolder* pObjList, + FX_FILESIZE pos, + uint32_t objnum, + FX_FILESIZE* pResultPos); + + uint32_t GetFirstPageNo() const { return m_dwFirstPageNo; } + + protected: + struct ObjectInfo { + ObjectInfo() : pos(0), type(0), gennum(0) {} + + FX_FILESIZE pos; + uint8_t type; + uint16_t gennum; + }; + + std::unique_ptr m_pSyntax; + std::map m_ObjectInfo; + + bool LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip); + FX_BOOL RebuildCrossRef(); + + private: + friend class CPDF_DataAvail; + + enum class ParserState { + kDefault, + kComment, + kWhitespace, + kString, + kHexString, + kEscapedString, + kXref, + kObjNum, + kPostObjNum, + kGenNum, + kPostGenNum, + kTrailer, + kBeginObj, + kEndObj + }; + + CPDF_Object* ParseDirect(CPDF_Object* pObj); + FX_BOOL LoadAllCrossRefV4(FX_FILESIZE pos); + FX_BOOL LoadAllCrossRefV5(FX_FILESIZE pos); + FX_BOOL LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef); + CPDF_Dictionary* LoadTrailerV4(); + Error SetEncryptHandler(); + void ReleaseEncryptHandler(); + FX_BOOL LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, uint32_t dwObjCount); + FX_BOOL LoadLinearizedCrossRefV4(FX_FILESIZE pos, uint32_t dwObjCount); + FX_BOOL LoadLinearizedAllCrossRefV5(FX_FILESIZE pos); + Error LoadLinearizedMainXRefTable(); + CPDF_StreamAcc* GetObjectStream(uint32_t number); + FX_BOOL IsLinearizedFile(IFX_FileRead* pFileAccess, uint32_t offset); + void SetEncryptDictionary(CPDF_Dictionary* pDict); + void ShrinkObjectMap(uint32_t size); + // A simple check whether the cross reference table matches with + // the objects. + bool VerifyCrossRefV4(); + + CPDF_Document* m_pDocument; // not owned + bool m_bHasParsed; + bool m_bOwnFileRead; + int m_FileVersion; + CPDF_Dictionary* m_pTrailer; + CPDF_Dictionary* m_pEncryptDict; + FX_FILESIZE m_LastXRefOffset; + FX_BOOL m_bXRefStream; + std::unique_ptr m_pSecurityHandler; + CFX_ByteString m_Password; + std::set m_SortedOffset; + CFX_ArrayTemplate m_Trailers; + bool m_bVersionUpdated; + CPDF_Object* m_pLinearized; + uint32_t m_dwFirstPageNo; + uint32_t m_dwXrefStartObjNum; + + // A map of object numbers to indirect streams. Map owns the streams. + std::map> m_ObjectStreamMap; + + // Mapping of object numbers to offsets. The offsets are relative to the first + // object in the stream. + using StreamObjectCache = std::map; + + // Mapping of streams to their object caches. This is valid as long as the + // streams in |m_ObjectStreamMap| are valid. + std::map m_ObjCache; + + // All indirect object numbers that are being parsed. + std::set m_ParsingObjNums; +}; + +#endif // CORE_FPDFAPI_PARSER_CPDF_PARSER_H_ -- cgit v1.2.3