diff options
author | Artem Strygin <art-snake@yandex-team.ru> | 2018-07-03 19:20:56 +0000 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2018-07-03 19:20:56 +0000 |
commit | 77f15f7883638a4ced131d74c053af10a5970ce9 (patch) | |
tree | 3263e0560c9638bca29fa92db1465cf4c25dcf3b | |
parent | 95b0293a29b235c746db0f01c8462ca89d7a814e (diff) | |
download | pdfium-77f15f7883638a4ced131d74c053af10a5970ce9.tar.xz |
Avoid duplicate data buffering in CPDF_SyntaxParser::ReadStream().
Allow sub-streams created from an IFX_SeekableReadStream to provide
stream data without copying memory.
The data will only reside in the top-level stream.
For example:
For file
http://www.major-landrover.ru/upload/attachments/f/9/f96aab07dab04ae89c8a509ec1ef2b31.pdf
(18 Mb)
The memory usage is reduced by ~13 Mb.
Change-Id: I2595c014d0fbe1fdd181cc04965cfd7d901c2d88
Reviewed-on: https://pdfium-review.googlesource.com/35930
Commit-Queue: Art Snake <art-snake@yandex-team.ru>
Reviewed-by: dsinclair <dsinclair@chromium.org>
-rw-r--r-- | core/fpdfapi/edit/cpdf_flateencoder.cpp | 2 | ||||
-rw-r--r-- | core/fpdfapi/edit/cpdf_flateencoder.h | 4 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_syntax_parser.cpp | 67 | ||||
-rw-r--r-- | testing/embedder_test.cpp | 4 | ||||
-rw-r--r-- | testing/embedder_test.h | 1 |
5 files changed, 65 insertions, 13 deletions
diff --git a/core/fpdfapi/edit/cpdf_flateencoder.cpp b/core/fpdfapi/edit/cpdf_flateencoder.cpp index a290da6dac..573c141ff2 100644 --- a/core/fpdfapi/edit/cpdf_flateencoder.cpp +++ b/core/fpdfapi/edit/cpdf_flateencoder.cpp @@ -15,7 +15,7 @@ CPDF_FlateEncoder::CPDF_FlateEncoder(const CPDF_Stream* pStream, bool bFlateEncode) - : m_dwSize(0), m_pAcc(pdfium::MakeRetain<CPDF_StreamAcc>(pStream)) { + : m_pAcc(pdfium::MakeRetain<CPDF_StreamAcc>(pStream)), m_dwSize(0) { m_pAcc->LoadAllDataRaw(); bool bHasFilter = pStream && pStream->HasFilter(); diff --git a/core/fpdfapi/edit/cpdf_flateencoder.h b/core/fpdfapi/edit/cpdf_flateencoder.h index 05633f6814..14ca7cec16 100644 --- a/core/fpdfapi/edit/cpdf_flateencoder.h +++ b/core/fpdfapi/edit/cpdf_flateencoder.h @@ -34,14 +34,14 @@ class CPDF_FlateEncoder { } private: + RetainPtr<CPDF_StreamAcc> m_pAcc; + uint32_t m_dwSize; MaybeOwned<uint8_t, FxFreeDeleter> m_pData; // Only one of these two pointers is valid at any time. UnownedPtr<const CPDF_Dictionary> m_pDict; std::unique_ptr<CPDF_Dictionary> m_pClonedDict; - - RetainPtr<CPDF_StreamAcc> m_pAcc; }; #endif // CORE_FPDFAPI_EDIT_CPDF_FLATEENCODER_H_ diff --git a/core/fpdfapi/parser/cpdf_syntax_parser.cpp b/core/fpdfapi/parser/cpdf_syntax_parser.cpp index 00eed49300..6edfb24ba2 100644 --- a/core/fpdfapi/parser/cpdf_syntax_parser.cpp +++ b/core/fpdfapi/parser/cpdf_syntax_parser.cpp @@ -34,6 +34,37 @@ namespace { enum class ReadStatus { Normal, Backslash, Octal, FinishOctal, CarriageReturn }; +class ReadableSubStream : public IFX_SeekableReadStream { + public: + ReadableSubStream(const RetainPtr<IFX_SeekableReadStream>& pFileRead, + FX_FILESIZE part_offset, + FX_FILESIZE part_size) + : m_pFileRead(pFileRead), + m_PartOffset(part_offset), + m_PartSize(part_size) {} + + ~ReadableSubStream() override = default; + + // IFX_SeekableReadStream overrides: + bool ReadBlock(void* buffer, FX_FILESIZE offset, size_t size) override { + FX_SAFE_FILESIZE safe_end = offset; + safe_end += size; + // Check that requested range is valid, to prevent calling of ReadBlock + // of original m_pFileRead with incorrect params. + if (!safe_end.IsValid() || safe_end.ValueOrDie() > m_PartSize) + return false; + + return m_pFileRead->ReadBlock(buffer, m_PartOffset + offset, size); + } + + FX_FILESIZE GetSize() override { return m_PartSize; } + + private: + RetainPtr<IFX_SeekableReadStream> m_pFileRead; + FX_FILESIZE m_PartOffset; + FX_FILESIZE m_PartSize; +}; + } // namespace // static @@ -577,7 +608,6 @@ std::unique_ptr<CPDF_Stream> CPDF_SyntaxParser::ReadStream( ToNextLine(); const FX_FILESIZE streamStartPos = GetPos(); - std::unique_ptr<uint8_t, FxFreeDeleter> pData; if (len > 0) { FX_SAFE_FILESIZE pos = GetPos(); pos += len; @@ -585,12 +615,18 @@ std::unique_ptr<CPDF_Stream> CPDF_SyntaxParser::ReadStream( len = -1; } + RetainPtr<IFX_SeekableReadStream> data; if (len > 0) { - pData.reset(FX_Alloc(uint8_t, len)); - // We should try read data first to allow the Validator to request data + // Check data availability first to allow the Validator to request data // smoothly, without jumps. - if (!ReadBlock(pData.get(), len)) + if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable( + m_HeaderOffset + GetPos(), len)) { return nullptr; + } + + data = pdfium::MakeRetain<ReadableSubStream>( + GetValidator(), m_HeaderOffset + GetPos(), len); + SetPos(GetPos() + len); } const ByteStringView kEndStreamStr("endstream"); @@ -611,7 +647,7 @@ std::unique_ptr<CPDF_Stream> CPDF_SyntaxParser::ReadStream( // specified length, it signals the end of stream. if (memcmp(m_WordBuffer, kEndStreamStr.raw_str(), kEndStreamStr.GetLength()) != 0) { - pData.reset(); + data.Reset(); len = -1; SetPos(streamStartPos); } @@ -628,14 +664,27 @@ std::unique_ptr<CPDF_Stream> CPDF_SyntaxParser::ReadStream( ASSERT(len >= 0); if (len > 0) { SetPos(streamStartPos); - pData.reset(FX_Alloc(uint8_t, len)); - if (!ReadBlock(pData.get(), len)) + // Check data availability first to allow the Validator to request data + // smoothly, without jumps. + if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable( + m_HeaderOffset + GetPos(), len)) { return nullptr; + } + + data = pdfium::MakeRetain<ReadableSubStream>( + GetValidator(), m_HeaderOffset + GetPos(), len); + SetPos(GetPos() + len); } } - auto pStream = - pdfium::MakeUnique<CPDF_Stream>(std::move(pData), len, std::move(pDict)); + auto pStream = pdfium::MakeUnique<CPDF_Stream>(); + if (data) { + pStream->InitStreamFromFile(data, std::move(pDict)); + } else { + DCHECK(!len); + // Empty stream + pStream->InitStream(nullptr, 0, std::move(pDict)); + } const FX_FILESIZE end_stream_offset = GetPos(); memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1); GetNextWordInternal(nullptr); diff --git a/testing/embedder_test.cpp b/testing/embedder_test.cpp index e4ac4ad2c7..e874640218 100644 --- a/testing/embedder_test.cpp +++ b/testing/embedder_test.cpp @@ -351,7 +351,9 @@ FPDF_DOCUMENT EmbedderTest::OpenSavedDocument(const char* password) { memset(&saved_file_access_, 0, sizeof(saved_file_access_)); saved_file_access_.m_FileLen = data_string_.size(); saved_file_access_.m_GetBlock = GetBlockFromString; - saved_file_access_.m_Param = &data_string_; + // Copy data to prevent clearing it before saved document close. + saved_document_file_data_ = data_string_; + saved_file_access_.m_Param = &saved_document_file_data_; saved_fake_file_access_ = pdfium::MakeUnique<FakeFileAccess>(&saved_file_access_); diff --git a/testing/embedder_test.h b/testing/embedder_test.h index e8f76c058f..b0dada0341 100644 --- a/testing/embedder_test.h +++ b/testing/embedder_test.h @@ -260,6 +260,7 @@ class EmbedderTest : public ::testing::Test, int GetPageNumberForSavedPage(FPDF_PAGE page) const; std::string data_string_; + std::string saved_document_file_data_; std::ofstream filestream_; }; |