diff options
-rw-r--r-- | core/fpdfapi/edit/cpdf_creator.cpp | 14 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_linearized_header.cpp | 17 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_parser.cpp | 11 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_parser.h | 6 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_parser_unittest.cpp | 71 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_syntax_parser.cpp | 9 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_syntax_parser.h | 19 |
7 files changed, 110 insertions, 37 deletions
diff --git a/core/fpdfapi/edit/cpdf_creator.cpp b/core/fpdfapi/edit/cpdf_creator.cpp index aa8c751046..11b0628b01 100644 --- a/core/fpdfapi/edit/cpdf_creator.cpp +++ b/core/fpdfapi/edit/cpdf_creator.cpp @@ -17,6 +17,7 @@ #include "core/fpdfapi/parser/cpdf_parser.h" #include "core/fpdfapi/parser/cpdf_security_handler.h" #include "core/fpdfapi/parser/cpdf_string.h" +#include "core/fpdfapi/parser/cpdf_syntax_parser.h" #include "core/fpdfapi/parser/fpdf_parser_decode.h" #include "core/fpdfapi/parser/fpdf_parser_utility.h" #include "core/fxcrt/fx_extension.h" @@ -254,20 +255,19 @@ CPDF_Creator::Stage CPDF_Creator::WriteDoc_Stage1() { } m_iStage = Stage::kInitWriteObjs20; } else { - m_SavedOffset = m_pParser->GetFileAccess()->GetSize(); + m_SavedOffset = m_pParser->GetSyntax()->GetDocumentSize(); m_iStage = Stage::kWriteIncremental15; } } if (m_iStage == Stage::kWriteIncremental15) { if (m_IsOriginal && m_SavedOffset > 0) { - RetainPtr<IFX_SeekableReadStream> pSrcFile = m_pParser->GetFileAccess(); - std::vector<uint8_t> buffer(4096); + static constexpr FX_FILESIZE kBufferSize = 4096; + std::vector<uint8_t> buffer(kBufferSize); FX_FILESIZE src_size = m_SavedOffset; + m_pParser->GetSyntax()->SetPos(0); while (src_size) { - uint32_t block_size = src_size > 4096 ? 4096 : src_size; - if (!pSrcFile->ReadBlock(buffer.data(), - m_Archive->CurrentOffset() - src_size, - block_size)) { + const FX_FILESIZE block_size = std::min(kBufferSize, src_size); + if (!m_pParser->GetSyntax()->ReadBlock(buffer.data(), block_size)) { return Stage::kInvalid; } if (!m_Archive->WriteBlock(buffer.data(), block_size)) diff --git a/core/fpdfapi/parser/cpdf_linearized_header.cpp b/core/fpdfapi/parser/cpdf_linearized_header.cpp index 5032bc3807..8286e0fbe3 100644 --- a/core/fpdfapi/parser/cpdf_linearized_header.cpp +++ b/core/fpdfapi/parser/cpdf_linearized_header.cpp @@ -38,17 +38,17 @@ bool IsValidNumericDictionaryValue(const CPDF_Dictionary* pDict, } bool IsLinearizedHeaderValid(const CPDF_LinearizedHeader* header, - FX_FILESIZE file_size) { + FX_FILESIZE document_size) { ASSERT(header); - return header->GetFileSize() == file_size && + return header->GetFileSize() == document_size && static_cast<int>(header->GetFirstPageNo()) >= 0 && header->GetFirstPageNo() < kMaxInt && header->GetFirstPageNo() < header->GetPageCount() && - header->GetMainXRefTableFirstEntryOffset() < file_size && + header->GetMainXRefTableFirstEntryOffset() < document_size && header->GetPageCount() > 0 && - header->GetFirstPageEndOffset() < file_size && - header->GetLastXRefOffset() < file_size && - header->GetHintStart() < file_size; + header->GetFirstPageEndOffset() < document_size && + header->GetLastXRefOffset() < document_size && + header->GetHintStart() < document_size; } } // namespace @@ -78,10 +78,9 @@ std::unique_ptr<CPDF_LinearizedHeader> CPDF_LinearizedHeader::Parse( auto result = pdfium::WrapUnique( new CPDF_LinearizedHeader(pDict.get(), parser->GetPos())); - if (!IsLinearizedHeaderValid(result.get(), - parser->GetFileAccess()->GetSize())) { + if (!IsLinearizedHeaderValid(result.get(), parser->GetDocumentSize())) return nullptr; - } + return result; } diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp index 16077fd39e..be18560ffc 100644 --- a/core/fpdfapi/parser/cpdf_parser.cpp +++ b/core/fpdfapi/parser/cpdf_parser.cpp @@ -119,10 +119,6 @@ bool CPDF_Parser::IsObjectFree(uint32_t objnum) const { return GetObjectType(objnum) == ObjectType::kFree; } -RetainPtr<IFX_SeekableReadStream> CPDF_Parser::GetFileAccess() const { - return m_pSyntax->GetFileAccess(); -} - void CPDF_Parser::ShrinkObjectMap(uint32_t objnum) { m_CrossRefTable->ShrinkObjectMap(objnum); } @@ -230,8 +226,7 @@ CPDF_Parser::Error CPDF_Parser::StartParseInternal() { FX_FILESIZE CPDF_Parser::ParseStartXRef() { static constexpr char kStartXRefKeyword[] = "startxref"; - m_pSyntax->SetPos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - - strlen(kStartXRefKeyword)); + m_pSyntax->SetPos(m_pSyntax->GetDocumentSize() - strlen(kStartXRefKeyword)); if (!m_pSyntax->BackwardsSearchToWord(kStartXRefKeyword, 4096)) return 0; @@ -245,7 +240,7 @@ FX_FILESIZE CPDF_Parser::ParseStartXRef() { return 0; const FX_SAFE_FILESIZE result = FXSYS_atoi64(xrefpos_str.c_str()); - if (!result.IsValid() || result.ValueOrDie() >= GetFileAccess()->GetSize()) + if (!result.IsValid() || result.ValueOrDie() >= m_pSyntax->GetDocumentSize()) return 0; return result.ValueOrDie(); @@ -453,7 +448,7 @@ bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData( return false; const size_t max_entries_in_file = - m_pSyntax->GetFileAccess()->GetSize() / kEntryConstSize; + m_pSyntax->GetDocumentSize() / kEntryConstSize; if (new_size.ValueOrDie() > max_entries_in_file) return false; diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h index 0af39d7529..7cd98c2c26 100644 --- a/core/fpdfapi/parser/cpdf_parser.h +++ b/core/fpdfapi/parser/cpdf_parser.h @@ -90,7 +90,6 @@ class CPDF_Parser { CPDF_SecurityHandler* GetSecurityHandler() const { return m_pSecurityHandler.get(); } - RetainPtr<IFX_SeekableReadStream> GetFileAccess() const; bool IsObjectFree(uint32_t objnum) const; int GetFileVersion() const { return m_FileVersion; } @@ -109,6 +108,8 @@ class CPDF_Parser { return m_CrossRefTable.get(); } + CPDF_SyntaxParser* GetSyntax() const { return m_pSyntax.get(); } + void SetLinearizedHeader(std::unique_ptr<CPDF_LinearizedHeader> pLinearized); protected: @@ -121,6 +122,9 @@ class CPDF_Parser { bool RebuildCrossRef(); private: + friend class cpdf_parser_ParseStartXRefWithHeaderOffset_Test; + friend class cpdf_parser_ParseStartXRef_Test; + friend class cpdf_parser_ParseLinearizedWithHeaderOffset_Test; friend class CPDF_DataAvail; struct CrossRefObjData { diff --git a/core/fpdfapi/parser/cpdf_parser_unittest.cpp b/core/fpdfapi/parser/cpdf_parser_unittest.cpp index db48493c2f..4ce1238ee3 100644 --- a/core/fpdfapi/parser/cpdf_parser_unittest.cpp +++ b/core/fpdfapi/parser/cpdf_parser_unittest.cpp @@ -3,8 +3,12 @@ // found in the LICENSE file. #include <limits> +#include <memory> #include <string> +#include <vector> +#include "core/fpdfapi/parser/cpdf_linearized_header.h" +#include "core/fpdfapi/parser/cpdf_object.h" #include "core/fpdfapi/parser/cpdf_parser.h" #include "core/fpdfapi/parser/cpdf_syntax_parser.h" #include "core/fxcrt/fx_extension.h" @@ -43,13 +47,19 @@ class CPDF_TestParser : public CPDF_Parser { } // Setup reading from a buffer and initial states. - bool InitTestFromBuffer(const unsigned char* buffer, size_t len) { - // For the test file, the header is set at the beginning. + bool InitTestFromBufferWithOffset(const unsigned char* buffer, + size_t len, + int header_offset) { m_pSyntax->InitParser( - pdfium::MakeRetain<CFX_BufferSeekableReadStream>(buffer, len), 0); + pdfium::MakeRetain<CFX_BufferSeekableReadStream>(buffer, len), + header_offset); return true; } + bool InitTestFromBuffer(const unsigned char* buffer, size_t len) { + return InitTestFromBufferWithOffset(buffer, len, 0 /*header_offset*/); + } + private: // Add test cases here as private friend so that protected members in // CPDF_Parser can be accessed by test cases. @@ -226,3 +236,58 @@ TEST(cpdf_parser, LoadCrossRefV4) { } } } + +TEST(cpdf_parser, ParseStartXRef) { + CPDF_TestParser parser; + std::string test_file; + ASSERT_TRUE( + PathService::GetTestFilePath("annotation_stamp_with_ap.pdf", &test_file)); + ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file; + + EXPECT_EQ(100940, parser.ParseStartXRef()); + std::unique_ptr<CPDF_Object> cross_ref_v5_obj = + parser.ParseIndirectObjectAt(100940, 0); + ASSERT_TRUE(cross_ref_v5_obj); + EXPECT_EQ(75u, cross_ref_v5_obj->GetObjNum()); +} + +TEST(cpdf_parser, ParseStartXRefWithHeaderOffset) { + static constexpr FX_FILESIZE kTestHeaderOffset = 765; + std::string test_file; + ASSERT_TRUE( + PathService::GetTestFilePath("annotation_stamp_with_ap.pdf", &test_file)); + RetainPtr<IFX_SeekableReadStream> pFileAccess = + IFX_SeekableReadStream::CreateFromFilename(test_file.c_str()); + ASSERT_TRUE(pFileAccess); + + std::vector<unsigned char> data(pFileAccess->GetSize() + kTestHeaderOffset); + ASSERT_TRUE(pFileAccess->ReadBlock(&data.front() + kTestHeaderOffset, 0, + pFileAccess->GetSize())); + CPDF_TestParser parser; + parser.InitTestFromBufferWithOffset(&data.front(), data.size(), + kTestHeaderOffset); + + EXPECT_EQ(100940, parser.ParseStartXRef()); + std::unique_ptr<CPDF_Object> cross_ref_v5_obj = + parser.ParseIndirectObjectAt(100940, 0); + ASSERT_TRUE(cross_ref_v5_obj); + EXPECT_EQ(75u, cross_ref_v5_obj->GetObjNum()); +} + +TEST(cpdf_parser, ParseLinearizedWithHeaderOffset) { + static constexpr FX_FILESIZE kTestHeaderOffset = 765; + std::string test_file; + ASSERT_TRUE(PathService::GetTestFilePath("linearized.pdf", &test_file)); + RetainPtr<IFX_SeekableReadStream> pFileAccess = + IFX_SeekableReadStream::CreateFromFilename(test_file.c_str()); + ASSERT_TRUE(pFileAccess); + + std::vector<unsigned char> data(pFileAccess->GetSize() + kTestHeaderOffset); + ASSERT_TRUE(pFileAccess->ReadBlock(&data.front() + kTestHeaderOffset, 0, + pFileAccess->GetSize())); + CPDF_TestParser parser; + parser.InitTestFromBufferWithOffset(&data.front(), data.size(), + kTestHeaderOffset); + + EXPECT_TRUE(parser.ParseLinearizedHeader()); +} diff --git a/core/fpdfapi/parser/cpdf_syntax_parser.cpp b/core/fpdfapi/parser/cpdf_syntax_parser.cpp index f8f36aee11..af5ff488db 100644 --- a/core/fpdfapi/parser/cpdf_syntax_parser.cpp +++ b/core/fpdfapi/parser/cpdf_syntax_parser.cpp @@ -112,6 +112,10 @@ bool CPDF_SyntaxParser::GetNextChar(uint8_t& ch) { return true; } +FX_FILESIZE CPDF_SyntaxParser::GetDocumentSize() const { + return m_FileLen - m_HeaderOffset; +} + bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch) { pos += m_HeaderOffset; if (pos >= m_FileLen) @@ -714,6 +718,7 @@ void CPDF_SyntaxParser::InitParserWithValidator( m_pFileBuf.clear(); m_HeaderOffset = HeaderOffset; m_FileLen = validator->GetSize(); + ASSERT(m_HeaderOffset <= m_FileLen); m_Pos = 0; m_pFileAccess = validator; m_BufOffset = 0; @@ -813,10 +818,6 @@ FX_FILESIZE CPDF_SyntaxParser::FindTag(const ByteStringView& tag) { return -1; } -RetainPtr<IFX_SeekableReadStream> CPDF_SyntaxParser::GetFileAccess() const { - return m_pFileAccess; -} - bool CPDF_SyntaxParser::IsPositionRead(FX_FILESIZE pos) const { return m_BufOffset <= pos && pos < static_cast<FX_FILESIZE>(m_BufOffset + m_pFileBuf.size()); diff --git a/core/fpdfapi/parser/cpdf_syntax_parser.h b/core/fpdfapi/parser/cpdf_syntax_parser.h index 962c32753d..ebd844c3e0 100644 --- a/core/fpdfapi/parser/cpdf_syntax_parser.h +++ b/core/fpdfapi/parser/cpdf_syntax_parser.h @@ -60,16 +60,24 @@ class CPDF_SyntaxParser { ByteString GetNextWord(bool* bIsNumber); ByteString PeekNextWord(bool* bIsNumber); - RetainPtr<IFX_SeekableReadStream> GetFileAccess() const; - const RetainPtr<CPDF_ReadValidator>& GetValidator() const { return m_pFileAccess; } uint32_t GetDirectNum(); bool GetNextChar(uint8_t& ch); + // The document size may be smaller than the file size. + // The syntax parser use position relative to document + // offset (|m_HeaderOffset|). + // The document size will be FileSize - "Header offset". + // All offsets was readed from document, should not be great than document + // size. Use it for checks instead of real file size. + FX_FILESIZE GetDocumentSize() const; + + ByteString ReadString(); + ByteString ReadHexString(); + private: - friend class CPDF_Parser; friend class CPDF_DataAvail; friend class cpdf_syntax_parser_ReadHexString_Test; @@ -84,8 +92,6 @@ class CPDF_SyntaxParser { const ByteStringView& tag, bool checkKeyword); - ByteString ReadString(); - ByteString ReadHexString(); unsigned int ReadEOLMarkers(FX_FILESIZE pos); FX_FILESIZE FindWordPos(const ByteStringView& word); FX_FILESIZE FindStreamEndPos(); @@ -99,6 +105,9 @@ class CPDF_SyntaxParser { ParseType parse_type); FX_FILESIZE m_Pos; + // The syntax parser use position relative to header offset. + // The header contains at file start, and can follow after some stuff. We + // ignore this stuff. FX_FILESIZE m_HeaderOffset; FX_FILESIZE m_FileLen; WeakPtr<ByteStringPool> m_pPool; |