summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArtem Strygin <art-snake@yandex-team.ru>2018-07-03 19:20:56 +0000
committerChromium commit bot <commit-bot@chromium.org>2018-07-03 19:20:56 +0000
commit77f15f7883638a4ced131d74c053af10a5970ce9 (patch)
tree3263e0560c9638bca29fa92db1465cf4c25dcf3b
parent95b0293a29b235c746db0f01c8462ca89d7a814e (diff)
downloadpdfium-77f15f7883638a4ced131d74c053af10a5970ce9.tar.xz
Avoid duplicate data buffering in CPDF_SyntaxParser::ReadStream().
Allow sub-streams created from an IFX_SeekableReadStream to provide stream data without copying memory. The data will only reside in the top-level stream. For example: For file http://www.major-landrover.ru/upload/attachments/f/9/f96aab07dab04ae89c8a509ec1ef2b31.pdf (18 Mb) The memory usage is reduced by ~13 Mb. Change-Id: I2595c014d0fbe1fdd181cc04965cfd7d901c2d88 Reviewed-on: https://pdfium-review.googlesource.com/35930 Commit-Queue: Art Snake <art-snake@yandex-team.ru> Reviewed-by: dsinclair <dsinclair@chromium.org>
-rw-r--r--core/fpdfapi/edit/cpdf_flateencoder.cpp2
-rw-r--r--core/fpdfapi/edit/cpdf_flateencoder.h4
-rw-r--r--core/fpdfapi/parser/cpdf_syntax_parser.cpp67
-rw-r--r--testing/embedder_test.cpp4
-rw-r--r--testing/embedder_test.h1
5 files changed, 65 insertions, 13 deletions
diff --git a/core/fpdfapi/edit/cpdf_flateencoder.cpp b/core/fpdfapi/edit/cpdf_flateencoder.cpp
index a290da6dac..573c141ff2 100644
--- a/core/fpdfapi/edit/cpdf_flateencoder.cpp
+++ b/core/fpdfapi/edit/cpdf_flateencoder.cpp
@@ -15,7 +15,7 @@
CPDF_FlateEncoder::CPDF_FlateEncoder(const CPDF_Stream* pStream,
bool bFlateEncode)
- : m_dwSize(0), m_pAcc(pdfium::MakeRetain<CPDF_StreamAcc>(pStream)) {
+ : m_pAcc(pdfium::MakeRetain<CPDF_StreamAcc>(pStream)), m_dwSize(0) {
m_pAcc->LoadAllDataRaw();
bool bHasFilter = pStream && pStream->HasFilter();
diff --git a/core/fpdfapi/edit/cpdf_flateencoder.h b/core/fpdfapi/edit/cpdf_flateencoder.h
index 05633f6814..14ca7cec16 100644
--- a/core/fpdfapi/edit/cpdf_flateencoder.h
+++ b/core/fpdfapi/edit/cpdf_flateencoder.h
@@ -34,14 +34,14 @@ class CPDF_FlateEncoder {
}
private:
+ RetainPtr<CPDF_StreamAcc> m_pAcc;
+
uint32_t m_dwSize;
MaybeOwned<uint8_t, FxFreeDeleter> m_pData;
// Only one of these two pointers is valid at any time.
UnownedPtr<const CPDF_Dictionary> m_pDict;
std::unique_ptr<CPDF_Dictionary> m_pClonedDict;
-
- RetainPtr<CPDF_StreamAcc> m_pAcc;
};
#endif // CORE_FPDFAPI_EDIT_CPDF_FLATEENCODER_H_
diff --git a/core/fpdfapi/parser/cpdf_syntax_parser.cpp b/core/fpdfapi/parser/cpdf_syntax_parser.cpp
index 00eed49300..6edfb24ba2 100644
--- a/core/fpdfapi/parser/cpdf_syntax_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_syntax_parser.cpp
@@ -34,6 +34,37 @@ namespace {
enum class ReadStatus { Normal, Backslash, Octal, FinishOctal, CarriageReturn };
+class ReadableSubStream : public IFX_SeekableReadStream {
+ public:
+ ReadableSubStream(const RetainPtr<IFX_SeekableReadStream>& pFileRead,
+ FX_FILESIZE part_offset,
+ FX_FILESIZE part_size)
+ : m_pFileRead(pFileRead),
+ m_PartOffset(part_offset),
+ m_PartSize(part_size) {}
+
+ ~ReadableSubStream() override = default;
+
+ // IFX_SeekableReadStream overrides:
+ bool ReadBlock(void* buffer, FX_FILESIZE offset, size_t size) override {
+ FX_SAFE_FILESIZE safe_end = offset;
+ safe_end += size;
+ // Check that requested range is valid, to prevent calling of ReadBlock
+ // of original m_pFileRead with incorrect params.
+ if (!safe_end.IsValid() || safe_end.ValueOrDie() > m_PartSize)
+ return false;
+
+ return m_pFileRead->ReadBlock(buffer, m_PartOffset + offset, size);
+ }
+
+ FX_FILESIZE GetSize() override { return m_PartSize; }
+
+ private:
+ RetainPtr<IFX_SeekableReadStream> m_pFileRead;
+ FX_FILESIZE m_PartOffset;
+ FX_FILESIZE m_PartSize;
+};
+
} // namespace
// static
@@ -577,7 +608,6 @@ std::unique_ptr<CPDF_Stream> CPDF_SyntaxParser::ReadStream(
ToNextLine();
const FX_FILESIZE streamStartPos = GetPos();
- std::unique_ptr<uint8_t, FxFreeDeleter> pData;
if (len > 0) {
FX_SAFE_FILESIZE pos = GetPos();
pos += len;
@@ -585,12 +615,18 @@ std::unique_ptr<CPDF_Stream> CPDF_SyntaxParser::ReadStream(
len = -1;
}
+ RetainPtr<IFX_SeekableReadStream> data;
if (len > 0) {
- pData.reset(FX_Alloc(uint8_t, len));
- // We should try read data first to allow the Validator to request data
+ // Check data availability first to allow the Validator to request data
// smoothly, without jumps.
- if (!ReadBlock(pData.get(), len))
+ if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
+ m_HeaderOffset + GetPos(), len)) {
return nullptr;
+ }
+
+ data = pdfium::MakeRetain<ReadableSubStream>(
+ GetValidator(), m_HeaderOffset + GetPos(), len);
+ SetPos(GetPos() + len);
}
const ByteStringView kEndStreamStr("endstream");
@@ -611,7 +647,7 @@ std::unique_ptr<CPDF_Stream> CPDF_SyntaxParser::ReadStream(
// specified length, it signals the end of stream.
if (memcmp(m_WordBuffer, kEndStreamStr.raw_str(),
kEndStreamStr.GetLength()) != 0) {
- pData.reset();
+ data.Reset();
len = -1;
SetPos(streamStartPos);
}
@@ -628,14 +664,27 @@ std::unique_ptr<CPDF_Stream> CPDF_SyntaxParser::ReadStream(
ASSERT(len >= 0);
if (len > 0) {
SetPos(streamStartPos);
- pData.reset(FX_Alloc(uint8_t, len));
- if (!ReadBlock(pData.get(), len))
+ // Check data availability first to allow the Validator to request data
+ // smoothly, without jumps.
+ if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
+ m_HeaderOffset + GetPos(), len)) {
return nullptr;
+ }
+
+ data = pdfium::MakeRetain<ReadableSubStream>(
+ GetValidator(), m_HeaderOffset + GetPos(), len);
+ SetPos(GetPos() + len);
}
}
- auto pStream =
- pdfium::MakeUnique<CPDF_Stream>(std::move(pData), len, std::move(pDict));
+ auto pStream = pdfium::MakeUnique<CPDF_Stream>();
+ if (data) {
+ pStream->InitStreamFromFile(data, std::move(pDict));
+ } else {
+ DCHECK(!len);
+ // Empty stream
+ pStream->InitStream(nullptr, 0, std::move(pDict));
+ }
const FX_FILESIZE end_stream_offset = GetPos();
memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
GetNextWordInternal(nullptr);
diff --git a/testing/embedder_test.cpp b/testing/embedder_test.cpp
index e4ac4ad2c7..e874640218 100644
--- a/testing/embedder_test.cpp
+++ b/testing/embedder_test.cpp
@@ -351,7 +351,9 @@ FPDF_DOCUMENT EmbedderTest::OpenSavedDocument(const char* password) {
memset(&saved_file_access_, 0, sizeof(saved_file_access_));
saved_file_access_.m_FileLen = data_string_.size();
saved_file_access_.m_GetBlock = GetBlockFromString;
- saved_file_access_.m_Param = &data_string_;
+ // Copy data to prevent clearing it before saved document close.
+ saved_document_file_data_ = data_string_;
+ saved_file_access_.m_Param = &saved_document_file_data_;
saved_fake_file_access_ =
pdfium::MakeUnique<FakeFileAccess>(&saved_file_access_);
diff --git a/testing/embedder_test.h b/testing/embedder_test.h
index e8f76c058f..b0dada0341 100644
--- a/testing/embedder_test.h
+++ b/testing/embedder_test.h
@@ -260,6 +260,7 @@ class EmbedderTest : public ::testing::Test,
int GetPageNumberForSavedPage(FPDF_PAGE page) const;
std::string data_string_;
+ std::string saved_document_file_data_;
std::ofstream filestream_;
};