summaryrefslogtreecommitdiff
path: root/core/include
diff options
context:
space:
mode:
authorTom Sepez <tsepez@chromium.org>2016-03-09 10:47:45 -0800
committerTom Sepez <tsepez@chromium.org>2016-03-09 10:47:45 -0800
commit8388037a5c58d60043b11c03a8efe78c54c65a4b (patch)
treed9348e8842aa1f8888bcbd2590d2ae7bee1b6db7 /core/include
parentf1fa151e146d70c5e031726581e176a8c7d0d579 (diff)
downloadpdfium-8388037a5c58d60043b11c03a8efe78c54c65a4b.tar.xz
Split off CPDF_Parser and CPDF_SimpleParser into .h/.cpp files
R=dsinclair@chromium.org Review URL: https://codereview.chromium.org/1773103003 .
Diffstat (limited to 'core/include')
-rw-r--r--core/include/fpdfapi/cpdf_parser.h173
-rw-r--r--core/include/fpdfapi/cpdf_simple_parser.h35
-rw-r--r--core/include/fpdfapi/fpdf_parser.h191
3 files changed, 209 insertions, 190 deletions
diff --git a/core/include/fpdfapi/cpdf_parser.h b/core/include/fpdfapi/cpdf_parser.h
new file mode 100644
index 0000000000..80a1107743
--- /dev/null
+++ b/core/include/fpdfapi/cpdf_parser.h
@@ -0,0 +1,173 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_INCLUDE_FPDFAPI_CPDF_PARSER_H_
+#define CORE_INCLUDE_FPDFAPI_CPDF_PARSER_H_
+
+#include <map>
+#include <memory>
+#include <set>
+
+#include "core/include/fxcrt/fx_basic.h"
+
+class CPDF_Array;
+class CPDF_CryptoHandler;
+class CPDF_Dictionary;
+class CPDF_Document;
+class CPDF_IndirectObjectHolder;
+class CPDF_Object;
+class CPDF_StreamAcc;
+class CPDF_SyntaxParser;
+class IFX_FileRead;
+class IPDF_SecurityHandler;
+
+class CPDF_Parser {
+ public:
+ enum Error {
+ SUCCESS = 0,
+ FILE_ERROR,
+ FORMAT_ERROR,
+ PASSWORD_ERROR,
+ HANDLER_ERROR
+ };
+
+ CPDF_Parser();
+ ~CPDF_Parser();
+
+ Error StartParse(IFX_FileRead* pFile);
+ FX_DWORD GetPermissions(FX_BOOL bCheckRevision = FALSE);
+
+ void SetPassword(const FX_CHAR* password) { m_Password = password; }
+ CFX_ByteString GetPassword() { return m_Password; }
+ CPDF_Dictionary* GetTrailer() const { return m_pTrailer; }
+ FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; }
+ CPDF_Document* GetDocument() const { return m_pDocument; }
+
+ FX_DWORD GetRootObjNum();
+ FX_DWORD GetInfoObjNum();
+ CPDF_Array* GetIDArray();
+
+ CPDF_Dictionary* GetEncryptDict() const { return m_pEncryptDict; }
+
+ CPDF_Object* ParseIndirectObject(CPDF_IndirectObjectHolder* pObjList,
+ FX_DWORD objnum);
+
+ FX_DWORD GetLastObjNum() const;
+ bool IsValidObjectNumber(FX_DWORD objnum) const;
+ FX_FILESIZE GetObjectPositionOrZero(FX_DWORD objnum) const;
+ uint8_t GetObjectType(FX_DWORD objnum) const;
+ uint16_t GetObjectGenNum(FX_DWORD objnum) const;
+ bool IsVersionUpdated() const { return m_bVersionUpdated; }
+ bool IsObjectFreeOrNull(FX_DWORD objnum) const;
+ FX_BOOL IsFormStream(FX_DWORD objnum, FX_BOOL& bForm);
+ CPDF_CryptoHandler* GetCryptoHandler();
+ IFX_FileRead* GetFileAccess() const;
+
+ FX_FILESIZE GetObjectOffset(FX_DWORD objnum) const;
+ FX_FILESIZE GetObjectSize(FX_DWORD objnum) const;
+
+ void GetIndirectBinary(FX_DWORD objnum, uint8_t*& pBuffer, FX_DWORD& size);
+ int GetFileVersion() const { return m_FileVersion; }
+ FX_BOOL IsXRefStream() const { return m_bXRefStream; }
+
+ CPDF_Object* ParseIndirectObjectAt(CPDF_IndirectObjectHolder* pObjList,
+ FX_FILESIZE pos,
+ FX_DWORD objnum);
+
+ CPDF_Object* ParseIndirectObjectAtByStrict(
+ CPDF_IndirectObjectHolder* pObjList,
+ FX_FILESIZE pos,
+ FX_DWORD objnum,
+ FX_FILESIZE* pResultPos);
+
+ Error StartAsyncParse(IFX_FileRead* pFile);
+
+ FX_DWORD GetFirstPageNo() const { return m_dwFirstPageNo; }
+
+ protected:
+ struct ObjectInfo {
+ ObjectInfo() : pos(0), type(0), gennum(0) {}
+
+ FX_FILESIZE pos;
+ uint8_t type;
+ uint16_t gennum;
+ };
+
+ void CloseParser();
+ CPDF_Object* ParseDirect(CPDF_Object* pObj);
+ FX_BOOL LoadAllCrossRefV4(FX_FILESIZE pos);
+ FX_BOOL LoadAllCrossRefV5(FX_FILESIZE pos);
+ bool LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip);
+ FX_BOOL LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef);
+ CPDF_Dictionary* LoadTrailerV4();
+ FX_BOOL RebuildCrossRef();
+ Error SetEncryptHandler();
+ void ReleaseEncryptHandler();
+ FX_BOOL LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount);
+ FX_BOOL LoadLinearizedCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount);
+ FX_BOOL LoadLinearizedAllCrossRefV5(FX_FILESIZE pos);
+ Error LoadLinearizedMainXRefTable();
+ CPDF_StreamAcc* GetObjectStream(FX_DWORD number);
+ FX_BOOL IsLinearizedFile(IFX_FileRead* pFileAccess, FX_DWORD offset);
+ void SetEncryptDictionary(CPDF_Dictionary* pDict);
+ void ShrinkObjectMap(FX_DWORD size);
+
+ CPDF_Document* m_pDocument;
+ std::unique_ptr<CPDF_SyntaxParser> m_pSyntax;
+ bool m_bOwnFileRead;
+ int m_FileVersion;
+ CPDF_Dictionary* m_pTrailer;
+ CPDF_Dictionary* m_pEncryptDict;
+ FX_FILESIZE m_LastXRefOffset;
+ FX_BOOL m_bXRefStream;
+ std::unique_ptr<IPDF_SecurityHandler> m_pSecurityHandler;
+ CFX_ByteString m_bsRecipient;
+ CFX_ByteString m_FilePath;
+ CFX_ByteString m_Password;
+ std::map<FX_DWORD, ObjectInfo> m_ObjectInfo;
+ std::set<FX_FILESIZE> m_SortedOffset;
+ CFX_ArrayTemplate<CPDF_Dictionary*> m_Trailers;
+ FX_BOOL m_bVersionUpdated;
+ CPDF_Object* m_pLinearized;
+ FX_DWORD m_dwFirstPageNo;
+ FX_DWORD m_dwXrefStartObjNum;
+
+ // A map of object numbers to indirect streams. Map owns the streams.
+ std::map<FX_DWORD, std::unique_ptr<CPDF_StreamAcc>> m_ObjectStreamMap;
+
+ // Mapping of object numbers to offsets. The offsets are relative to the first
+ // object in the stream.
+ using StreamObjectCache = std::map<FX_DWORD, FX_DWORD>;
+
+ // Mapping of streams to their object caches. This is valid as long as the
+ // streams in |m_ObjectStreamMap| are valid.
+ std::map<CPDF_StreamAcc*, StreamObjectCache> m_ObjCache;
+
+ // All indirect object numbers that are being parsed.
+ std::set<FX_DWORD> m_ParsingObjNums;
+
+ friend class CPDF_DataAvail;
+
+ private:
+ enum class ParserState {
+ kDefault,
+ kComment,
+ kWhitespace,
+ kString,
+ kHexString,
+ kEscapedString,
+ kXref,
+ kObjNum,
+ kPostObjNum,
+ kGenNum,
+ kPostGenNum,
+ kTrailer,
+ kBeginObj,
+ kEndObj
+ };
+};
+
+#endif // CORE_INCLUDE_FPDFAPI_CPDF_PARSER_H_
diff --git a/core/include/fpdfapi/cpdf_simple_parser.h b/core/include/fpdfapi/cpdf_simple_parser.h
new file mode 100644
index 0000000000..dc1441d18e
--- /dev/null
+++ b/core/include/fpdfapi/cpdf_simple_parser.h
@@ -0,0 +1,35 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_INCLUDE_FPDFAPI_CPDF_SIMPLE_PARSER_H_
+#define CORE_INCLUDE_FPDFAPI_CPDF_SIMPLE_PARSER_H_
+
+#include "core/include/fxcrt/fx_string.h"
+#include "core/include/fxcrt/fx_system.h"
+
+class CPDF_SimpleParser {
+ public:
+ CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize);
+ CPDF_SimpleParser(const CFX_ByteStringC& str);
+
+ CFX_ByteStringC GetWord();
+
+ // Find the token and its |nParams| parameters from the start of data,
+ // and move the current position to the start of those parameters.
+ bool FindTagParamFromStart(const CFX_ByteStringC& token, int nParams);
+
+ // For testing only.
+ FX_DWORD GetCurPos() const { return m_dwCurPos; }
+
+ private:
+ void ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize);
+
+ const uint8_t* m_pData;
+ FX_DWORD m_dwSize;
+ FX_DWORD m_dwCurPos;
+};
+
+#endif // CORE_INCLUDE_FPDFAPI_CPDF_SIMPLE_PARSER_H_
diff --git a/core/include/fpdfapi/fpdf_parser.h b/core/include/fpdfapi/fpdf_parser.h
index ec7a98cb91..06030701b4 100644
--- a/core/include/fpdfapi/fpdf_parser.h
+++ b/core/include/fpdfapi/fpdf_parser.h
@@ -1,4 +1,4 @@
-// Copyright 2014 PDFium Authors. All rights reserved.
+// Copyright 2016 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -47,26 +47,6 @@ class IPDF_SecurityHandler;
using ScopedFileStream =
std::unique_ptr<IFX_FileStream, ReleaseDeleter<IFX_FileStream>>;
-// Use the accessors below instead of directly accessing PDF_CharType.
-extern const char PDF_CharType[256];
-
-inline bool PDFCharIsWhitespace(uint8_t c) {
- return PDF_CharType[c] == 'W';
-}
-inline bool PDFCharIsNumeric(uint8_t c) {
- return PDF_CharType[c] == 'N';
-}
-inline bool PDFCharIsDelimiter(uint8_t c) {
- return PDF_CharType[c] == 'D';
-}
-inline bool PDFCharIsOther(uint8_t c) {
- return PDF_CharType[c] == 'R';
-}
-
-inline bool PDFCharIsLineEnding(uint8_t c) {
- return c == '\r' || c == '\n';
-}
-
template <typename T>
class ScopedSetInsertion {
public:
@@ -84,175 +64,6 @@ class ScopedSetInsertion {
// Indexed by 8-bit char code, contains unicode code points.
extern const FX_WORD PDFDocEncoding[256];
-
-class CPDF_SimpleParser {
- public:
- CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize);
- CPDF_SimpleParser(const CFX_ByteStringC& str);
-
- CFX_ByteStringC GetWord();
-
- // Find the token and its |nParams| parameters from the start of data,
- // and move the current position to the start of those parameters.
- bool FindTagParamFromStart(const CFX_ByteStringC& token, int nParams);
-
- // For testing only.
- FX_DWORD GetCurPos() const { return m_dwCurPos; }
-
- private:
- void ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize);
-
- const uint8_t* m_pData;
- FX_DWORD m_dwSize;
- FX_DWORD m_dwCurPos;
-};
-
-class CPDF_Parser {
- public:
- enum Error {
- SUCCESS = 0,
- FILE_ERROR,
- FORMAT_ERROR,
- PASSWORD_ERROR,
- HANDLER_ERROR
- };
-
- CPDF_Parser();
- ~CPDF_Parser();
-
- Error StartParse(IFX_FileRead* pFile);
- FX_DWORD GetPermissions(FX_BOOL bCheckRevision = FALSE);
-
- void SetPassword(const FX_CHAR* password) { m_Password = password; }
- CFX_ByteString GetPassword() { return m_Password; }
- CPDF_Dictionary* GetTrailer() const { return m_pTrailer; }
- FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; }
- CPDF_Document* GetDocument() const { return m_pDocument; }
-
- FX_DWORD GetRootObjNum();
- FX_DWORD GetInfoObjNum();
- CPDF_Array* GetIDArray();
-
- CPDF_Dictionary* GetEncryptDict() const { return m_pEncryptDict; }
-
- CPDF_Object* ParseIndirectObject(CPDF_IndirectObjectHolder* pObjList,
- FX_DWORD objnum);
-
- FX_DWORD GetLastObjNum() const;
- bool IsValidObjectNumber(FX_DWORD objnum) const;
- FX_FILESIZE GetObjectPositionOrZero(FX_DWORD objnum) const;
- uint8_t GetObjectType(FX_DWORD objnum) const;
- uint16_t GetObjectGenNum(FX_DWORD objnum) const;
- bool IsVersionUpdated() const { return m_bVersionUpdated; }
- bool IsObjectFreeOrNull(FX_DWORD objnum) const;
- FX_BOOL IsFormStream(FX_DWORD objnum, FX_BOOL& bForm);
- CPDF_CryptoHandler* GetCryptoHandler();
- IFX_FileRead* GetFileAccess() const;
-
- FX_FILESIZE GetObjectOffset(FX_DWORD objnum) const;
- FX_FILESIZE GetObjectSize(FX_DWORD objnum) const;
-
- void GetIndirectBinary(FX_DWORD objnum, uint8_t*& pBuffer, FX_DWORD& size);
- int GetFileVersion() const { return m_FileVersion; }
- FX_BOOL IsXRefStream() const { return m_bXRefStream; }
-
- CPDF_Object* ParseIndirectObjectAt(CPDF_IndirectObjectHolder* pObjList,
- FX_FILESIZE pos,
- FX_DWORD objnum);
-
- CPDF_Object* ParseIndirectObjectAtByStrict(
- CPDF_IndirectObjectHolder* pObjList,
- FX_FILESIZE pos,
- FX_DWORD objnum,
- FX_FILESIZE* pResultPos);
-
- Error StartAsyncParse(IFX_FileRead* pFile);
-
- FX_DWORD GetFirstPageNo() const { return m_dwFirstPageNo; }
-
- protected:
- struct ObjectInfo {
- ObjectInfo() : pos(0), type(0), gennum(0) {}
-
- FX_FILESIZE pos;
- uint8_t type;
- uint16_t gennum;
- };
-
- void CloseParser();
- CPDF_Object* ParseDirect(CPDF_Object* pObj);
- FX_BOOL LoadAllCrossRefV4(FX_FILESIZE pos);
- FX_BOOL LoadAllCrossRefV5(FX_FILESIZE pos);
- bool LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip);
- FX_BOOL LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef);
- CPDF_Dictionary* LoadTrailerV4();
- FX_BOOL RebuildCrossRef();
- Error SetEncryptHandler();
- void ReleaseEncryptHandler();
- FX_BOOL LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount);
- FX_BOOL LoadLinearizedCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount);
- FX_BOOL LoadLinearizedAllCrossRefV5(FX_FILESIZE pos);
- Error LoadLinearizedMainXRefTable();
- CPDF_StreamAcc* GetObjectStream(FX_DWORD number);
- FX_BOOL IsLinearizedFile(IFX_FileRead* pFileAccess, FX_DWORD offset);
- void SetEncryptDictionary(CPDF_Dictionary* pDict);
- void ShrinkObjectMap(FX_DWORD size);
-
- CPDF_Document* m_pDocument;
- std::unique_ptr<CPDF_SyntaxParser> m_pSyntax;
- bool m_bOwnFileRead;
- int m_FileVersion;
- CPDF_Dictionary* m_pTrailer;
- CPDF_Dictionary* m_pEncryptDict;
- FX_FILESIZE m_LastXRefOffset;
- FX_BOOL m_bXRefStream;
- std::unique_ptr<IPDF_SecurityHandler> m_pSecurityHandler;
- CFX_ByteString m_bsRecipient;
- CFX_ByteString m_FilePath;
- CFX_ByteString m_Password;
- std::map<FX_DWORD, ObjectInfo> m_ObjectInfo;
- std::set<FX_FILESIZE> m_SortedOffset;
- CFX_ArrayTemplate<CPDF_Dictionary*> m_Trailers;
- FX_BOOL m_bVersionUpdated;
- CPDF_Object* m_pLinearized;
- FX_DWORD m_dwFirstPageNo;
- FX_DWORD m_dwXrefStartObjNum;
-
- // A map of object numbers to indirect streams. Map owns the streams.
- std::map<FX_DWORD, std::unique_ptr<CPDF_StreamAcc>> m_ObjectStreamMap;
-
- // Mapping of object numbers to offsets. The offsets are relative to the first
- // object in the stream.
- using StreamObjectCache = std::map<FX_DWORD, FX_DWORD>;
-
- // Mapping of streams to their object caches. This is valid as long as the
- // streams in |m_ObjectStreamMap| are valid.
- std::map<CPDF_StreamAcc*, StreamObjectCache> m_ObjCache;
-
- // All indirect object numbers that are being parsed.
- std::set<FX_DWORD> m_ParsingObjNums;
-
- friend class CPDF_DataAvail;
-
- private:
- enum class ParserState {
- kDefault,
- kComment,
- kWhitespace,
- kString,
- kHexString,
- kEscapedString,
- kXref,
- kObjNum,
- kPostObjNum,
- kGenNum,
- kPostGenNum,
- kTrailer,
- kBeginObj,
- kEndObj
- };
-};
-
#define FXCIPHER_NONE 0
#define FXCIPHER_RC4 1
#define FXCIPHER_AES 2