summaryrefslogtreecommitdiff
path: root/core/include/fpdfapi/cpdf_parser.h
diff options
context:
space:
mode:
Diffstat (limited to 'core/include/fpdfapi/cpdf_parser.h')
-rw-r--r--core/include/fpdfapi/cpdf_parser.h173
1 files changed, 173 insertions, 0 deletions
diff --git a/core/include/fpdfapi/cpdf_parser.h b/core/include/fpdfapi/cpdf_parser.h
new file mode 100644
index 0000000000..80a1107743
--- /dev/null
+++ b/core/include/fpdfapi/cpdf_parser.h
@@ -0,0 +1,173 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_INCLUDE_FPDFAPI_CPDF_PARSER_H_
+#define CORE_INCLUDE_FPDFAPI_CPDF_PARSER_H_
+
+#include <map>
+#include <memory>
+#include <set>
+
+#include "core/include/fxcrt/fx_basic.h"
+
+class CPDF_Array;
+class CPDF_CryptoHandler;
+class CPDF_Dictionary;
+class CPDF_Document;
+class CPDF_IndirectObjectHolder;
+class CPDF_Object;
+class CPDF_StreamAcc;
+class CPDF_SyntaxParser;
+class IFX_FileRead;
+class IPDF_SecurityHandler;
+
+class CPDF_Parser {
+ public:
+ enum Error {
+ SUCCESS = 0,
+ FILE_ERROR,
+ FORMAT_ERROR,
+ PASSWORD_ERROR,
+ HANDLER_ERROR
+ };
+
+ CPDF_Parser();
+ ~CPDF_Parser();
+
+ Error StartParse(IFX_FileRead* pFile);
+ FX_DWORD GetPermissions(FX_BOOL bCheckRevision = FALSE);
+
+ void SetPassword(const FX_CHAR* password) { m_Password = password; }
+ CFX_ByteString GetPassword() { return m_Password; }
+ CPDF_Dictionary* GetTrailer() const { return m_pTrailer; }
+ FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; }
+ CPDF_Document* GetDocument() const { return m_pDocument; }
+
+ FX_DWORD GetRootObjNum();
+ FX_DWORD GetInfoObjNum();
+ CPDF_Array* GetIDArray();
+
+ CPDF_Dictionary* GetEncryptDict() const { return m_pEncryptDict; }
+
+ CPDF_Object* ParseIndirectObject(CPDF_IndirectObjectHolder* pObjList,
+ FX_DWORD objnum);
+
+ FX_DWORD GetLastObjNum() const;
+ bool IsValidObjectNumber(FX_DWORD objnum) const;
+ FX_FILESIZE GetObjectPositionOrZero(FX_DWORD objnum) const;
+ uint8_t GetObjectType(FX_DWORD objnum) const;
+ uint16_t GetObjectGenNum(FX_DWORD objnum) const;
+ bool IsVersionUpdated() const { return m_bVersionUpdated; }
+ bool IsObjectFreeOrNull(FX_DWORD objnum) const;
+ FX_BOOL IsFormStream(FX_DWORD objnum, FX_BOOL& bForm);
+ CPDF_CryptoHandler* GetCryptoHandler();
+ IFX_FileRead* GetFileAccess() const;
+
+ FX_FILESIZE GetObjectOffset(FX_DWORD objnum) const;
+ FX_FILESIZE GetObjectSize(FX_DWORD objnum) const;
+
+ void GetIndirectBinary(FX_DWORD objnum, uint8_t*& pBuffer, FX_DWORD& size);
+ int GetFileVersion() const { return m_FileVersion; }
+ FX_BOOL IsXRefStream() const { return m_bXRefStream; }
+
+ CPDF_Object* ParseIndirectObjectAt(CPDF_IndirectObjectHolder* pObjList,
+ FX_FILESIZE pos,
+ FX_DWORD objnum);
+
+ CPDF_Object* ParseIndirectObjectAtByStrict(
+ CPDF_IndirectObjectHolder* pObjList,
+ FX_FILESIZE pos,
+ FX_DWORD objnum,
+ FX_FILESIZE* pResultPos);
+
+ Error StartAsyncParse(IFX_FileRead* pFile);
+
+ FX_DWORD GetFirstPageNo() const { return m_dwFirstPageNo; }
+
+ protected:
+ struct ObjectInfo {
+ ObjectInfo() : pos(0), type(0), gennum(0) {}
+
+ FX_FILESIZE pos;
+ uint8_t type;
+ uint16_t gennum;
+ };
+
+ void CloseParser();
+ CPDF_Object* ParseDirect(CPDF_Object* pObj);
+ FX_BOOL LoadAllCrossRefV4(FX_FILESIZE pos);
+ FX_BOOL LoadAllCrossRefV5(FX_FILESIZE pos);
+ bool LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip);
+ FX_BOOL LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef);
+ CPDF_Dictionary* LoadTrailerV4();
+ FX_BOOL RebuildCrossRef();
+ Error SetEncryptHandler();
+ void ReleaseEncryptHandler();
+ FX_BOOL LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount);
+ FX_BOOL LoadLinearizedCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount);
+ FX_BOOL LoadLinearizedAllCrossRefV5(FX_FILESIZE pos);
+ Error LoadLinearizedMainXRefTable();
+ CPDF_StreamAcc* GetObjectStream(FX_DWORD number);
+ FX_BOOL IsLinearizedFile(IFX_FileRead* pFileAccess, FX_DWORD offset);
+ void SetEncryptDictionary(CPDF_Dictionary* pDict);
+ void ShrinkObjectMap(FX_DWORD size);
+
+ CPDF_Document* m_pDocument;
+ std::unique_ptr<CPDF_SyntaxParser> m_pSyntax;
+ bool m_bOwnFileRead;
+ int m_FileVersion;
+ CPDF_Dictionary* m_pTrailer;
+ CPDF_Dictionary* m_pEncryptDict;
+ FX_FILESIZE m_LastXRefOffset;
+ FX_BOOL m_bXRefStream;
+ std::unique_ptr<IPDF_SecurityHandler> m_pSecurityHandler;
+ CFX_ByteString m_bsRecipient;
+ CFX_ByteString m_FilePath;
+ CFX_ByteString m_Password;
+ std::map<FX_DWORD, ObjectInfo> m_ObjectInfo;
+ std::set<FX_FILESIZE> m_SortedOffset;
+ CFX_ArrayTemplate<CPDF_Dictionary*> m_Trailers;
+ FX_BOOL m_bVersionUpdated;
+ CPDF_Object* m_pLinearized;
+ FX_DWORD m_dwFirstPageNo;
+ FX_DWORD m_dwXrefStartObjNum;
+
+ // A map of object numbers to indirect streams. Map owns the streams.
+ std::map<FX_DWORD, std::unique_ptr<CPDF_StreamAcc>> m_ObjectStreamMap;
+
+ // Mapping of object numbers to offsets. The offsets are relative to the first
+ // object in the stream.
+ using StreamObjectCache = std::map<FX_DWORD, FX_DWORD>;
+
+ // Mapping of streams to their object caches. This is valid as long as the
+ // streams in |m_ObjectStreamMap| are valid.
+ std::map<CPDF_StreamAcc*, StreamObjectCache> m_ObjCache;
+
+ // All indirect object numbers that are being parsed.
+ std::set<FX_DWORD> m_ParsingObjNums;
+
+ friend class CPDF_DataAvail;
+
+ private:
+ enum class ParserState {
+ kDefault,
+ kComment,
+ kWhitespace,
+ kString,
+ kHexString,
+ kEscapedString,
+ kXref,
+ kObjNum,
+ kPostObjNum,
+ kGenNum,
+ kPostGenNum,
+ kTrailer,
+ kBeginObj,
+ kEndObj
+ };
+};
+
+#endif // CORE_INCLUDE_FPDFAPI_CPDF_PARSER_H_