From 8388037a5c58d60043b11c03a8efe78c54c65a4b Mon Sep 17 00:00:00 2001 From: Tom Sepez Date: Wed, 9 Mar 2016 10:47:45 -0800 Subject: Split off CPDF_Parser and CPDF_SimpleParser into .h/.cpp files R=dsinclair@chromium.org Review URL: https://codereview.chromium.org/1773103003 . --- core/include/fpdfapi/cpdf_parser.h | 173 +++++++++++++++++++++++++++ core/include/fpdfapi/cpdf_simple_parser.h | 35 ++++++ core/include/fpdfapi/fpdf_parser.h | 191 +----------------------------- 3 files changed, 209 insertions(+), 190 deletions(-) create mode 100644 core/include/fpdfapi/cpdf_parser.h create mode 100644 core/include/fpdfapi/cpdf_simple_parser.h (limited to 'core/include/fpdfapi') diff --git a/core/include/fpdfapi/cpdf_parser.h b/core/include/fpdfapi/cpdf_parser.h new file mode 100644 index 0000000000..80a1107743 --- /dev/null +++ b/core/include/fpdfapi/cpdf_parser.h @@ -0,0 +1,173 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_INCLUDE_FPDFAPI_CPDF_PARSER_H_ +#define CORE_INCLUDE_FPDFAPI_CPDF_PARSER_H_ + +#include +#include +#include + +#include "core/include/fxcrt/fx_basic.h" + +class CPDF_Array; +class CPDF_CryptoHandler; +class CPDF_Dictionary; +class CPDF_Document; +class CPDF_IndirectObjectHolder; +class CPDF_Object; +class CPDF_StreamAcc; +class CPDF_SyntaxParser; +class IFX_FileRead; +class IPDF_SecurityHandler; + +class CPDF_Parser { + public: + enum Error { + SUCCESS = 0, + FILE_ERROR, + FORMAT_ERROR, + PASSWORD_ERROR, + HANDLER_ERROR + }; + + CPDF_Parser(); + ~CPDF_Parser(); + + Error StartParse(IFX_FileRead* pFile); + FX_DWORD GetPermissions(FX_BOOL bCheckRevision = FALSE); + + void SetPassword(const FX_CHAR* password) { m_Password = password; } + CFX_ByteString GetPassword() { return m_Password; } + CPDF_Dictionary* GetTrailer() const { return m_pTrailer; } + FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; } + CPDF_Document* GetDocument() const { return m_pDocument; } + + FX_DWORD GetRootObjNum(); + FX_DWORD GetInfoObjNum(); + CPDF_Array* GetIDArray(); + + CPDF_Dictionary* GetEncryptDict() const { return m_pEncryptDict; } + + CPDF_Object* ParseIndirectObject(CPDF_IndirectObjectHolder* pObjList, + FX_DWORD objnum); + + FX_DWORD GetLastObjNum() const; + bool IsValidObjectNumber(FX_DWORD objnum) const; + FX_FILESIZE GetObjectPositionOrZero(FX_DWORD objnum) const; + uint8_t GetObjectType(FX_DWORD objnum) const; + uint16_t GetObjectGenNum(FX_DWORD objnum) const; + bool IsVersionUpdated() const { return m_bVersionUpdated; } + bool IsObjectFreeOrNull(FX_DWORD objnum) const; + FX_BOOL IsFormStream(FX_DWORD objnum, FX_BOOL& bForm); + CPDF_CryptoHandler* GetCryptoHandler(); + IFX_FileRead* GetFileAccess() const; + + FX_FILESIZE GetObjectOffset(FX_DWORD objnum) const; + FX_FILESIZE GetObjectSize(FX_DWORD objnum) const; + + void GetIndirectBinary(FX_DWORD objnum, uint8_t*& pBuffer, FX_DWORD& size); + int GetFileVersion() const { return m_FileVersion; } + FX_BOOL IsXRefStream() const { return m_bXRefStream; } + + CPDF_Object* ParseIndirectObjectAt(CPDF_IndirectObjectHolder* pObjList, + FX_FILESIZE pos, + FX_DWORD objnum); + + CPDF_Object* ParseIndirectObjectAtByStrict( + CPDF_IndirectObjectHolder* pObjList, + FX_FILESIZE pos, + FX_DWORD objnum, + FX_FILESIZE* pResultPos); + + Error StartAsyncParse(IFX_FileRead* pFile); + + FX_DWORD GetFirstPageNo() const { return m_dwFirstPageNo; } + + protected: + struct ObjectInfo { + ObjectInfo() : pos(0), type(0), gennum(0) {} + + FX_FILESIZE pos; + uint8_t type; + uint16_t gennum; + }; + + void CloseParser(); + CPDF_Object* ParseDirect(CPDF_Object* pObj); + FX_BOOL LoadAllCrossRefV4(FX_FILESIZE pos); + FX_BOOL LoadAllCrossRefV5(FX_FILESIZE pos); + bool LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip); + FX_BOOL LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef); + CPDF_Dictionary* LoadTrailerV4(); + FX_BOOL RebuildCrossRef(); + Error SetEncryptHandler(); + void ReleaseEncryptHandler(); + FX_BOOL LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount); + FX_BOOL LoadLinearizedCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount); + FX_BOOL LoadLinearizedAllCrossRefV5(FX_FILESIZE pos); + Error LoadLinearizedMainXRefTable(); + CPDF_StreamAcc* GetObjectStream(FX_DWORD number); + FX_BOOL IsLinearizedFile(IFX_FileRead* pFileAccess, FX_DWORD offset); + void SetEncryptDictionary(CPDF_Dictionary* pDict); + void ShrinkObjectMap(FX_DWORD size); + + CPDF_Document* m_pDocument; + std::unique_ptr m_pSyntax; + bool m_bOwnFileRead; + int m_FileVersion; + CPDF_Dictionary* m_pTrailer; + CPDF_Dictionary* m_pEncryptDict; + FX_FILESIZE m_LastXRefOffset; + FX_BOOL m_bXRefStream; + std::unique_ptr m_pSecurityHandler; + CFX_ByteString m_bsRecipient; + CFX_ByteString m_FilePath; + CFX_ByteString m_Password; + std::map m_ObjectInfo; + std::set m_SortedOffset; + CFX_ArrayTemplate m_Trailers; + FX_BOOL m_bVersionUpdated; + CPDF_Object* m_pLinearized; + FX_DWORD m_dwFirstPageNo; + FX_DWORD m_dwXrefStartObjNum; + + // A map of object numbers to indirect streams. Map owns the streams. + std::map> m_ObjectStreamMap; + + // Mapping of object numbers to offsets. The offsets are relative to the first + // object in the stream. + using StreamObjectCache = std::map; + + // Mapping of streams to their object caches. This is valid as long as the + // streams in |m_ObjectStreamMap| are valid. + std::map m_ObjCache; + + // All indirect object numbers that are being parsed. + std::set m_ParsingObjNums; + + friend class CPDF_DataAvail; + + private: + enum class ParserState { + kDefault, + kComment, + kWhitespace, + kString, + kHexString, + kEscapedString, + kXref, + kObjNum, + kPostObjNum, + kGenNum, + kPostGenNum, + kTrailer, + kBeginObj, + kEndObj + }; +}; + +#endif // CORE_INCLUDE_FPDFAPI_CPDF_PARSER_H_ diff --git a/core/include/fpdfapi/cpdf_simple_parser.h b/core/include/fpdfapi/cpdf_simple_parser.h new file mode 100644 index 0000000000..dc1441d18e --- /dev/null +++ b/core/include/fpdfapi/cpdf_simple_parser.h @@ -0,0 +1,35 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_INCLUDE_FPDFAPI_CPDF_SIMPLE_PARSER_H_ +#define CORE_INCLUDE_FPDFAPI_CPDF_SIMPLE_PARSER_H_ + +#include "core/include/fxcrt/fx_string.h" +#include "core/include/fxcrt/fx_system.h" + +class CPDF_SimpleParser { + public: + CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize); + CPDF_SimpleParser(const CFX_ByteStringC& str); + + CFX_ByteStringC GetWord(); + + // Find the token and its |nParams| parameters from the start of data, + // and move the current position to the start of those parameters. + bool FindTagParamFromStart(const CFX_ByteStringC& token, int nParams); + + // For testing only. + FX_DWORD GetCurPos() const { return m_dwCurPos; } + + private: + void ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize); + + const uint8_t* m_pData; + FX_DWORD m_dwSize; + FX_DWORD m_dwCurPos; +}; + +#endif // CORE_INCLUDE_FPDFAPI_CPDF_SIMPLE_PARSER_H_ diff --git a/core/include/fpdfapi/fpdf_parser.h b/core/include/fpdfapi/fpdf_parser.h index ec7a98cb91..06030701b4 100644 --- a/core/include/fpdfapi/fpdf_parser.h +++ b/core/include/fpdfapi/fpdf_parser.h @@ -1,4 +1,4 @@ -// Copyright 2014 PDFium Authors. All rights reserved. +// Copyright 2016 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -47,26 +47,6 @@ class IPDF_SecurityHandler; using ScopedFileStream = std::unique_ptr>; -// Use the accessors below instead of directly accessing PDF_CharType. -extern const char PDF_CharType[256]; - -inline bool PDFCharIsWhitespace(uint8_t c) { - return PDF_CharType[c] == 'W'; -} -inline bool PDFCharIsNumeric(uint8_t c) { - return PDF_CharType[c] == 'N'; -} -inline bool PDFCharIsDelimiter(uint8_t c) { - return PDF_CharType[c] == 'D'; -} -inline bool PDFCharIsOther(uint8_t c) { - return PDF_CharType[c] == 'R'; -} - -inline bool PDFCharIsLineEnding(uint8_t c) { - return c == '\r' || c == '\n'; -} - template class ScopedSetInsertion { public: @@ -84,175 +64,6 @@ class ScopedSetInsertion { // Indexed by 8-bit char code, contains unicode code points. extern const FX_WORD PDFDocEncoding[256]; - -class CPDF_SimpleParser { - public: - CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize); - CPDF_SimpleParser(const CFX_ByteStringC& str); - - CFX_ByteStringC GetWord(); - - // Find the token and its |nParams| parameters from the start of data, - // and move the current position to the start of those parameters. - bool FindTagParamFromStart(const CFX_ByteStringC& token, int nParams); - - // For testing only. - FX_DWORD GetCurPos() const { return m_dwCurPos; } - - private: - void ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize); - - const uint8_t* m_pData; - FX_DWORD m_dwSize; - FX_DWORD m_dwCurPos; -}; - -class CPDF_Parser { - public: - enum Error { - SUCCESS = 0, - FILE_ERROR, - FORMAT_ERROR, - PASSWORD_ERROR, - HANDLER_ERROR - }; - - CPDF_Parser(); - ~CPDF_Parser(); - - Error StartParse(IFX_FileRead* pFile); - FX_DWORD GetPermissions(FX_BOOL bCheckRevision = FALSE); - - void SetPassword(const FX_CHAR* password) { m_Password = password; } - CFX_ByteString GetPassword() { return m_Password; } - CPDF_Dictionary* GetTrailer() const { return m_pTrailer; } - FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; } - CPDF_Document* GetDocument() const { return m_pDocument; } - - FX_DWORD GetRootObjNum(); - FX_DWORD GetInfoObjNum(); - CPDF_Array* GetIDArray(); - - CPDF_Dictionary* GetEncryptDict() const { return m_pEncryptDict; } - - CPDF_Object* ParseIndirectObject(CPDF_IndirectObjectHolder* pObjList, - FX_DWORD objnum); - - FX_DWORD GetLastObjNum() const; - bool IsValidObjectNumber(FX_DWORD objnum) const; - FX_FILESIZE GetObjectPositionOrZero(FX_DWORD objnum) const; - uint8_t GetObjectType(FX_DWORD objnum) const; - uint16_t GetObjectGenNum(FX_DWORD objnum) const; - bool IsVersionUpdated() const { return m_bVersionUpdated; } - bool IsObjectFreeOrNull(FX_DWORD objnum) const; - FX_BOOL IsFormStream(FX_DWORD objnum, FX_BOOL& bForm); - CPDF_CryptoHandler* GetCryptoHandler(); - IFX_FileRead* GetFileAccess() const; - - FX_FILESIZE GetObjectOffset(FX_DWORD objnum) const; - FX_FILESIZE GetObjectSize(FX_DWORD objnum) const; - - void GetIndirectBinary(FX_DWORD objnum, uint8_t*& pBuffer, FX_DWORD& size); - int GetFileVersion() const { return m_FileVersion; } - FX_BOOL IsXRefStream() const { return m_bXRefStream; } - - CPDF_Object* ParseIndirectObjectAt(CPDF_IndirectObjectHolder* pObjList, - FX_FILESIZE pos, - FX_DWORD objnum); - - CPDF_Object* ParseIndirectObjectAtByStrict( - CPDF_IndirectObjectHolder* pObjList, - FX_FILESIZE pos, - FX_DWORD objnum, - FX_FILESIZE* pResultPos); - - Error StartAsyncParse(IFX_FileRead* pFile); - - FX_DWORD GetFirstPageNo() const { return m_dwFirstPageNo; } - - protected: - struct ObjectInfo { - ObjectInfo() : pos(0), type(0), gennum(0) {} - - FX_FILESIZE pos; - uint8_t type; - uint16_t gennum; - }; - - void CloseParser(); - CPDF_Object* ParseDirect(CPDF_Object* pObj); - FX_BOOL LoadAllCrossRefV4(FX_FILESIZE pos); - FX_BOOL LoadAllCrossRefV5(FX_FILESIZE pos); - bool LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip); - FX_BOOL LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef); - CPDF_Dictionary* LoadTrailerV4(); - FX_BOOL RebuildCrossRef(); - Error SetEncryptHandler(); - void ReleaseEncryptHandler(); - FX_BOOL LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount); - FX_BOOL LoadLinearizedCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount); - FX_BOOL LoadLinearizedAllCrossRefV5(FX_FILESIZE pos); - Error LoadLinearizedMainXRefTable(); - CPDF_StreamAcc* GetObjectStream(FX_DWORD number); - FX_BOOL IsLinearizedFile(IFX_FileRead* pFileAccess, FX_DWORD offset); - void SetEncryptDictionary(CPDF_Dictionary* pDict); - void ShrinkObjectMap(FX_DWORD size); - - CPDF_Document* m_pDocument; - std::unique_ptr m_pSyntax; - bool m_bOwnFileRead; - int m_FileVersion; - CPDF_Dictionary* m_pTrailer; - CPDF_Dictionary* m_pEncryptDict; - FX_FILESIZE m_LastXRefOffset; - FX_BOOL m_bXRefStream; - std::unique_ptr m_pSecurityHandler; - CFX_ByteString m_bsRecipient; - CFX_ByteString m_FilePath; - CFX_ByteString m_Password; - std::map m_ObjectInfo; - std::set m_SortedOffset; - CFX_ArrayTemplate m_Trailers; - FX_BOOL m_bVersionUpdated; - CPDF_Object* m_pLinearized; - FX_DWORD m_dwFirstPageNo; - FX_DWORD m_dwXrefStartObjNum; - - // A map of object numbers to indirect streams. Map owns the streams. - std::map> m_ObjectStreamMap; - - // Mapping of object numbers to offsets. The offsets are relative to the first - // object in the stream. - using StreamObjectCache = std::map; - - // Mapping of streams to their object caches. This is valid as long as the - // streams in |m_ObjectStreamMap| are valid. - std::map m_ObjCache; - - // All indirect object numbers that are being parsed. - std::set m_ParsingObjNums; - - friend class CPDF_DataAvail; - - private: - enum class ParserState { - kDefault, - kComment, - kWhitespace, - kString, - kHexString, - kEscapedString, - kXref, - kObjNum, - kPostObjNum, - kGenNum, - kPostGenNum, - kTrailer, - kBeginObj, - kEndObj - }; -}; - #define FXCIPHER_NONE 0 #define FXCIPHER_RC4 1 #define FXCIPHER_AES 2 -- cgit v1.2.3