From 5b322338073162afab98bb28c920692c73b995ed Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Wed, 25 Jul 2018 17:35:18 +0000 Subject: Move CPDF_SyntaxParser init methods into ctor. - CPDF_SyntaxParser can no longer be initialized multiple times. - Make the file length and header offset const. - Make the header offset type FX_FILESIZE consistently. - Simplify for the common case where the header offset is 0. Change-Id: I7138db1fbcec3b7578b0239b92fc1154fa4dc4ce Reviewed-on: https://pdfium-review.googlesource.com/38850 Reviewed-by: Art Snake Commit-Queue: Lei Zhang --- core/fpdfapi/parser/cfdf_document.cpp | 3 +- .../parser/cpdf_cross_ref_avail_unittest.cpp | 6 +-- core/fpdfapi/parser/cpdf_data_avail.cpp | 3 +- core/fpdfapi/parser/cpdf_hint_tables_unittest.cpp | 12 ++--- core/fpdfapi/parser/cpdf_object_stream.cpp | 7 +-- core/fpdfapi/parser/cpdf_parser.cpp | 8 +-- core/fpdfapi/parser/cpdf_parser.h | 10 ++-- core/fpdfapi/parser/cpdf_parser_unittest.cpp | 6 +-- core/fpdfapi/parser/cpdf_syntax_parser.cpp | 46 ++++++++-------- core/fpdfapi/parser/cpdf_syntax_parser.h | 27 +++++----- .../fpdfapi/parser/cpdf_syntax_parser_unittest.cpp | 61 +++++++++++----------- 11 files changed, 88 insertions(+), 101 deletions(-) (limited to 'core/fpdfapi/parser') diff --git a/core/fpdfapi/parser/cfdf_document.cpp b/core/fpdfapi/parser/cfdf_document.cpp index f6af4546d7..bfade94af6 100644 --- a/core/fpdfapi/parser/cfdf_document.cpp +++ b/core/fpdfapi/parser/cfdf_document.cpp @@ -39,8 +39,7 @@ std::unique_ptr CFDF_Document::ParseMemory(uint8_t* pData, void CFDF_Document::ParseStream( const RetainPtr& pFile) { m_pFile = pFile; - CPDF_SyntaxParser parser; - parser.InitParser(m_pFile, 0); + CPDF_SyntaxParser parser(m_pFile); while (1) { bool bNumber; ByteString word = parser.GetNextWord(&bNumber); diff --git a/core/fpdfapi/parser/cpdf_cross_ref_avail_unittest.cpp b/core/fpdfapi/parser/cpdf_cross_ref_avail_unittest.cpp index e158bafd3e..4ee6b59ad5 100644 --- a/core/fpdfapi/parser/cpdf_cross_ref_avail_unittest.cpp +++ b/core/fpdfapi/parser/cpdf_cross_ref_avail_unittest.cpp @@ -17,10 +17,8 @@ namespace { std::unique_ptr MakeParserForBuffer( const unsigned char* buffer, size_t buffer_size) { - auto parser = pdfium::MakeUnique(); - parser->InitParser( - pdfium::MakeRetain(buffer, buffer_size), 0); - return parser; + return pdfium::MakeUnique( + pdfium::MakeRetain(buffer, buffer_size)); } } // namespace diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp index 9cfe51dd68..577b7e7a18 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.cpp +++ b/core/fpdfapi/parser/cpdf_data_avail.cpp @@ -506,7 +506,8 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() { if (header_offset == kInvalidHeaderOffset) return DocAvailStatus::DataError; - m_parser.m_pSyntax->InitParserWithValidator(GetValidator(), header_offset); + m_parser.m_pSyntax = + pdfium::MakeUnique(GetValidator(), header_offset); m_pLinearized = m_parser.ParseLinearizedHeader(); if (GetValidator()->has_read_problems()) return DocAvailStatus::DataNotAvailable; diff --git a/core/fpdfapi/parser/cpdf_hint_tables_unittest.cpp b/core/fpdfapi/parser/cpdf_hint_tables_unittest.cpp index 8a7331d29b..34c1694c59 100644 --- a/core/fpdfapi/parser/cpdf_hint_tables_unittest.cpp +++ b/core/fpdfapi/parser/cpdf_hint_tables_unittest.cpp @@ -48,12 +48,9 @@ class TestLinearizedHeader : public CPDF_LinearizedHeader { static std::unique_ptr MakeHeader( const std::string& inline_data) { - CPDF_SyntaxParser parser; - parser.InitParser( - pdfium::MakeRetain( - reinterpret_cast(inline_data.data()), - inline_data.size()), - 0); + CPDF_SyntaxParser parser(pdfium::MakeRetain( + reinterpret_cast(inline_data.data()), + inline_data.size())); std::unique_ptr dict = ToDictionary(parser.GetObjectBody(nullptr)); ASSERT(dict); @@ -161,8 +158,7 @@ TEST_F(CPDF_HintTablesTest, FirstPageOffset) { // This hint table is extracted from linearized file, generated by qpdf tool. RetainPtr validator = MakeValidatorFromFile("hint_table_102p.bin"); - CPDF_SyntaxParser parser; - parser.InitParserWithValidator(validator, 0); + CPDF_SyntaxParser parser(validator, 0); std::unique_ptr stream = ToStream(parser.GetObjectBody(nullptr)); ASSERT_TRUE(stream); auto hint_tables = pdfium::MakeUnique( diff --git a/core/fpdfapi/parser/cpdf_object_stream.cpp b/core/fpdfapi/parser/cpdf_object_stream.cpp index 530b774a63..a85cbf1ffa 100644 --- a/core/fpdfapi/parser/cpdf_object_stream.cpp +++ b/core/fpdfapi/parser/cpdf_object_stream.cpp @@ -101,9 +101,7 @@ void CPDF_ObjectStream::Init(const CPDF_Stream* stream) { true); } - CPDF_SyntaxParser syntax; - syntax.InitParser(data_stream_, 0); - + CPDF_SyntaxParser syntax(data_stream_); const int object_count = stream->GetDict()->GetIntegerFor("N"); for (int32_t i = object_count; i > 0; --i) { if (syntax.GetPos() >= data_stream_->GetSize()) @@ -130,8 +128,7 @@ std::unique_ptr CPDF_ObjectStream::ParseObjectAtOffset( if (offset_in_stream.ValueOrDie() >= data_stream_->GetSize()) return nullptr; - CPDF_SyntaxParser syntax; - syntax.InitParser(data_stream_, 0); + CPDF_SyntaxParser syntax(data_stream_); syntax.SetPos(offset_in_stream.ValueOrDie()); return syntax.GetObjectBody(pObjList); } diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp index be18560ffc..ba8974a0de 100644 --- a/core/fpdfapi/parser/cpdf_parser.cpp +++ b/core/fpdfapi/parser/cpdf_parser.cpp @@ -57,11 +57,7 @@ class ObjectsHolderStub : public CPDF_Parser::ParsedObjectsHolder { } // namespace CPDF_Parser::CPDF_Parser(ParsedObjectsHolder* holder) - : m_pSyntax(pdfium::MakeUnique()), - m_pObjectsHolder(holder), - m_bHasParsed(false), - m_bXRefStream(false), - m_FileVersion(0), + : m_pObjectsHolder(holder), m_CrossRefTable(pdfium::MakeUnique()) { if (!holder) { m_pOwnedObjectsHolder = pdfium::MakeUnique(); @@ -131,7 +127,7 @@ bool CPDF_Parser::InitSyntaxParser( if (validator->GetSize() < header_offset + kPDFHeaderSize) return false; - m_pSyntax->InitParserWithValidator(validator, header_offset); + m_pSyntax = pdfium::MakeUnique(validator, header_offset); return ParseFileVersion(); } diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h index 7cd98c2c26..c07c932cf4 100644 --- a/core/fpdfapi/parser/cpdf_parser.h +++ b/core/fpdfapi/parser/cpdf_parser.h @@ -116,11 +116,11 @@ class CPDF_Parser { using ObjectType = CPDF_CrossRefTable::ObjectType; using ObjectInfo = CPDF_CrossRefTable::ObjectInfo; - std::unique_ptr m_pSyntax; - bool LoadCrossRefV4(FX_FILESIZE pos, bool bSkip); bool RebuildCrossRef(); + std::unique_ptr m_pSyntax; + private: friend class cpdf_parser_ParseStartXRefWithHeaderOffset_Test; friend class cpdf_parser_ParseStartXRef_Test; @@ -169,9 +169,9 @@ class CPDF_Parser { std::unique_ptr m_pOwnedObjectsHolder; UnownedPtr m_pObjectsHolder; - bool m_bHasParsed; - bool m_bXRefStream; - int m_FileVersion; + bool m_bHasParsed = false; + bool m_bXRefStream = false; + int m_FileVersion = 0; // m_CrossRefTable must be destroyed after m_pSecurityHandler due to the // ownership of the ID array data. std::unique_ptr m_CrossRefTable; diff --git a/core/fpdfapi/parser/cpdf_parser_unittest.cpp b/core/fpdfapi/parser/cpdf_parser_unittest.cpp index 4ce1238ee3..2e7e06066b 100644 --- a/core/fpdfapi/parser/cpdf_parser_unittest.cpp +++ b/core/fpdfapi/parser/cpdf_parser_unittest.cpp @@ -42,15 +42,15 @@ class CPDF_TestParser : public CPDF_Parser { return false; // For the test file, the header is set at the beginning. - m_pSyntax->InitParser(pFileAccess, 0); + m_pSyntax = pdfium::MakeUnique(pFileAccess); return true; } // Setup reading from a buffer and initial states. bool InitTestFromBufferWithOffset(const unsigned char* buffer, size_t len, - int header_offset) { - m_pSyntax->InitParser( + FX_FILESIZE header_offset) { + m_pSyntax = CPDF_SyntaxParser::CreateForTesting( pdfium::MakeRetain(buffer, len), header_offset); return true; diff --git a/core/fpdfapi/parser/cpdf_syntax_parser.cpp b/core/fpdfapi/parser/cpdf_syntax_parser.cpp index af5ff488db..3acad525f7 100644 --- a/core/fpdfapi/parser/cpdf_syntax_parser.cpp +++ b/core/fpdfapi/parser/cpdf_syntax_parser.cpp @@ -70,7 +70,29 @@ class ReadableSubStream : public IFX_SeekableReadStream { // static int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0; -CPDF_SyntaxParser::CPDF_SyntaxParser() = default; +// static +std::unique_ptr CPDF_SyntaxParser::CreateForTesting( + const RetainPtr& pFileAccess, + FX_FILESIZE HeaderOffset) { + return pdfium::MakeUnique( + pdfium::MakeRetain(pFileAccess, nullptr), + HeaderOffset); +} + +CPDF_SyntaxParser::CPDF_SyntaxParser( + const RetainPtr& pFileAccess) + : CPDF_SyntaxParser( + pdfium::MakeRetain(pFileAccess, nullptr), + 0) {} + +CPDF_SyntaxParser::CPDF_SyntaxParser( + const RetainPtr& validator, + FX_FILESIZE HeaderOffset) + : m_pFileAccess(validator), + m_HeaderOffset(HeaderOffset), + m_FileLen(m_pFileAccess->GetSize()) { + ASSERT(m_HeaderOffset <= m_FileLen); +} CPDF_SyntaxParser::~CPDF_SyntaxParser() = default; @@ -702,28 +724,6 @@ std::unique_ptr CPDF_SyntaxParser::ReadStream( return pStream; } -void CPDF_SyntaxParser::InitParser( - const RetainPtr& pFileAccess, - uint32_t HeaderOffset) { - ASSERT(pFileAccess); - return InitParserWithValidator( - pdfium::MakeRetain(pFileAccess, nullptr), - HeaderOffset); -} - -void CPDF_SyntaxParser::InitParserWithValidator( - const RetainPtr& validator, - uint32_t HeaderOffset) { - ASSERT(validator); - m_pFileBuf.clear(); - m_HeaderOffset = HeaderOffset; - m_FileLen = validator->GetSize(); - ASSERT(m_HeaderOffset <= m_FileLen); - m_Pos = 0; - m_pFileAccess = validator; - m_BufOffset = 0; -} - uint32_t CPDF_SyntaxParser::GetDirectNum() { bool bIsNumber; GetNextWordInternal(&bIsNumber); diff --git a/core/fpdfapi/parser/cpdf_syntax_parser.h b/core/fpdfapi/parser/cpdf_syntax_parser.h index ebd844c3e0..92f28d9874 100644 --- a/core/fpdfapi/parser/cpdf_syntax_parser.h +++ b/core/fpdfapi/parser/cpdf_syntax_parser.h @@ -27,15 +27,16 @@ class CPDF_SyntaxParser { public: enum class ParseType { kStrict, kLoose }; - CPDF_SyntaxParser(); + static std::unique_ptr CreateForTesting( + const RetainPtr& pFileAccess, + FX_FILESIZE HeaderOffset); + + explicit CPDF_SyntaxParser( + const RetainPtr& pFileAccess); + CPDF_SyntaxParser(const RetainPtr& pValidator, + FX_FILESIZE HeaderOffset); ~CPDF_SyntaxParser(); - void InitParser(const RetainPtr& pFileAccess, - uint32_t HeaderOffset); - - void InitParserWithValidator(const RetainPtr& pValidator, - uint32_t HeaderOffset); - void SetReadBufferSize(uint32_t read_buffer_size) { m_ReadBufferSize = read_buffer_size; } @@ -104,17 +105,17 @@ class CPDF_SyntaxParser { CPDF_IndirectObjectHolder* pObjList, ParseType parse_type); - FX_FILESIZE m_Pos; + RetainPtr m_pFileAccess; // The syntax parser use position relative to header offset. // The header contains at file start, and can follow after some stuff. We // ignore this stuff. - FX_FILESIZE m_HeaderOffset; - FX_FILESIZE m_FileLen; + const FX_FILESIZE m_HeaderOffset; + const FX_FILESIZE m_FileLen; + FX_FILESIZE m_Pos = 0; WeakPtr m_pPool; std::vector m_pFileBuf; - RetainPtr m_pFileAccess; - FX_FILESIZE m_BufOffset; - uint32_t m_WordSize; + FX_FILESIZE m_BufOffset = 0; + uint32_t m_WordSize = 0; uint8_t m_WordBuffer[257]; uint32_t m_ReadBufferSize = CPDF_ModuleMgr::kFileBufSize; }; diff --git a/core/fpdfapi/parser/cpdf_syntax_parser_unittest.cpp b/core/fpdfapi/parser/cpdf_syntax_parser_unittest.cpp index cb065eb4e2..57d26b1ed1 100644 --- a/core/fpdfapi/parser/cpdf_syntax_parser_unittest.cpp +++ b/core/fpdfapi/parser/cpdf_syntax_parser_unittest.cpp @@ -18,8 +18,8 @@ TEST(cpdf_syntax_parser, ReadHexString) { { // Empty string. uint8_t data[] = ""; - CPDF_SyntaxParser parser; - parser.InitParser(pdfium::MakeRetain(data, 0, false), 0); + CPDF_SyntaxParser parser( + pdfium::MakeRetain(data, 0, false)); EXPECT_EQ("", parser.ReadHexString()); EXPECT_EQ(0, parser.GetPos()); } @@ -27,8 +27,8 @@ TEST(cpdf_syntax_parser, ReadHexString) { { // Blank string. uint8_t data[] = " "; - CPDF_SyntaxParser parser; - parser.InitParser(pdfium::MakeRetain(data, 2, false), 0); + CPDF_SyntaxParser parser( + pdfium::MakeRetain(data, 2, false)); EXPECT_EQ("", parser.ReadHexString()); EXPECT_EQ(2, parser.GetPos()); } @@ -36,8 +36,8 @@ TEST(cpdf_syntax_parser, ReadHexString) { { // Skips unknown characters. uint8_t data[] = "z12b"; - CPDF_SyntaxParser parser; - parser.InitParser(pdfium::MakeRetain(data, 4, false), 0); + CPDF_SyntaxParser parser( + pdfium::MakeRetain(data, 4, false)); EXPECT_EQ("\x12\xb0", parser.ReadHexString()); EXPECT_EQ(4, parser.GetPos()); } @@ -45,8 +45,8 @@ TEST(cpdf_syntax_parser, ReadHexString) { { // Skips unknown characters. uint8_t data[] = "*<&*#$^&@1"; - CPDF_SyntaxParser parser; - parser.InitParser(pdfium::MakeRetain(data, 10, false), 0); + CPDF_SyntaxParser parser( + pdfium::MakeRetain(data, 10, false)); EXPECT_EQ("\x10", parser.ReadHexString()); EXPECT_EQ(10, parser.GetPos()); } @@ -54,8 +54,8 @@ TEST(cpdf_syntax_parser, ReadHexString) { { // Skips unknown characters. uint8_t data[] = "\x80zab"; - CPDF_SyntaxParser parser; - parser.InitParser(pdfium::MakeRetain(data, 4, false), 0); + CPDF_SyntaxParser parser( + pdfium::MakeRetain(data, 4, false)); EXPECT_EQ("\xab", parser.ReadHexString()); EXPECT_EQ(4, parser.GetPos()); } @@ -63,8 +63,8 @@ TEST(cpdf_syntax_parser, ReadHexString) { { // Skips unknown characters. uint8_t data[] = "\xffzab"; - CPDF_SyntaxParser parser; - parser.InitParser(pdfium::MakeRetain(data, 4, false), 0); + CPDF_SyntaxParser parser( + pdfium::MakeRetain(data, 4, false)); EXPECT_EQ("\xab", parser.ReadHexString()); EXPECT_EQ(4, parser.GetPos()); } @@ -72,8 +72,8 @@ TEST(cpdf_syntax_parser, ReadHexString) { { // Regular conversion. uint8_t data[] = "1A2b>abcd"; - CPDF_SyntaxParser parser; - parser.InitParser(pdfium::MakeRetain(data, 9, false), 0); + CPDF_SyntaxParser parser( + pdfium::MakeRetain(data, 9, false)); EXPECT_EQ("\x1a\x2b", parser.ReadHexString()); EXPECT_EQ(5, parser.GetPos()); } @@ -81,8 +81,8 @@ TEST(cpdf_syntax_parser, ReadHexString) { { // Position out of bounds. uint8_t data[] = "12ab>"; - CPDF_SyntaxParser parser; - parser.InitParser(pdfium::MakeRetain(data, 5, false), 0); + CPDF_SyntaxParser parser( + pdfium::MakeRetain(data, 5, false)); parser.SetPos(5); EXPECT_EQ("", parser.ReadHexString()); @@ -103,8 +103,8 @@ TEST(cpdf_syntax_parser, ReadHexString) { { // Missing ending >. uint8_t data[] = "1A2b"; - CPDF_SyntaxParser parser; - parser.InitParser(pdfium::MakeRetain(data, 4, false), 0); + CPDF_SyntaxParser parser( + pdfium::MakeRetain(data, 4, false)); EXPECT_EQ("\x1a\x2b", parser.ReadHexString()); EXPECT_EQ(4, parser.GetPos()); } @@ -112,8 +112,8 @@ TEST(cpdf_syntax_parser, ReadHexString) { { // Missing ending >. uint8_t data[] = "12abz"; - CPDF_SyntaxParser parser; - parser.InitParser(pdfium::MakeRetain(data, 5, false), 0); + CPDF_SyntaxParser parser( + pdfium::MakeRetain(data, 5, false)); EXPECT_EQ("\x12\xab", parser.ReadHexString()); EXPECT_EQ(5, parser.GetPos()); } @@ -121,8 +121,8 @@ TEST(cpdf_syntax_parser, ReadHexString) { { // Uneven number of bytes. uint8_t data[] = "1A2>asdf"; - CPDF_SyntaxParser parser; - parser.InitParser(pdfium::MakeRetain(data, 8, false), 0); + CPDF_SyntaxParser parser( + pdfium::MakeRetain(data, 8, false)); EXPECT_EQ("\x1a\x20", parser.ReadHexString()); EXPECT_EQ(4, parser.GetPos()); } @@ -130,8 +130,8 @@ TEST(cpdf_syntax_parser, ReadHexString) { { // Uneven number of bytes. uint8_t data[] = "1A2zasdf"; - CPDF_SyntaxParser parser; - parser.InitParser(pdfium::MakeRetain(data, 8, false), 0); + CPDF_SyntaxParser parser( + pdfium::MakeRetain(data, 8, false)); EXPECT_EQ("\x1a\x2a\xdf", parser.ReadHexString()); EXPECT_EQ(8, parser.GetPos()); } @@ -139,27 +139,26 @@ TEST(cpdf_syntax_parser, ReadHexString) { { // Just ending character. uint8_t data[] = ">"; - CPDF_SyntaxParser parser; - parser.InitParser(pdfium::MakeRetain(data, 1, false), 0); + CPDF_SyntaxParser parser( + pdfium::MakeRetain(data, 1, false)); EXPECT_EQ("", parser.ReadHexString()); EXPECT_EQ(1, parser.GetPos()); } } TEST(cpdf_syntax_parser, GetInvalidReference) { - CPDF_SyntaxParser parser; // Data with a reference with number CPDF_Object::kInvalidObjNum uint8_t data[] = "4294967295 0 R"; - parser.InitParser(pdfium::MakeRetain(data, 14, false), 0); + CPDF_SyntaxParser parser( + pdfium::MakeRetain(data, 14, false)); std::unique_ptr ref = parser.GetObjectBody(nullptr); EXPECT_FALSE(ref); } TEST(cpdf_syntax_parser, PeekNextWord) { - CPDF_SyntaxParser parser; uint8_t data[] = " WORD "; - parser.InitParser( - pdfium::MakeRetain(data, sizeof(data), false), 0); + CPDF_SyntaxParser parser( + pdfium::MakeRetain(data, sizeof(data), false)); EXPECT_EQ("WORD", parser.PeekNextWord(nullptr)); EXPECT_EQ("WORD", parser.GetNextWord(nullptr)); } -- cgit v1.2.3