diff options
author | Artem Strygin <art-snake@yandex-team.ru> | 2017-07-27 14:01:32 +0300 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2017-07-28 00:03:54 +0000 |
commit | 834ebece214f06c6e9fda803ab321e8453b3a54b (patch) | |
tree | 259c4aead897943a48c99016d17cd63a04de5d12 /core | |
parent | 672a1721620c3f4e62fe6adfaceb929d423ae31f (diff) | |
download | pdfium-834ebece214f06c6e9fda803ab321e8453b3a54b.tar.xz |
Implement read validator.
The wrapper for IFX_SeekableReadStream.
Which allow us to check data availability on read request
and request downloading of non available data on fly.
Change-Id: I27c66cd58f43f8432f73104cc3f4c980515a9b56
Reviewed-on: https://pdfium-review.googlesource.com/9050
Commit-Queue: Art Snake <art-snake@yandex-team.ru>
Reviewed-by: (OOO Jul 28 - Aug 8) dsinclair <dsinclair@chromium.org>
Diffstat (limited to 'core')
-rw-r--r-- | core/fpdfapi/parser/cpdf_data_avail.cpp | 44 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_data_avail.h | 9 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_read_validator.cpp | 94 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_read_validator.h | 48 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_read_validator_unittest.cpp | 180 |
5 files changed, 363 insertions, 12 deletions
diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp index 043462c3bb..bc81e991ca 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.cpp +++ b/core/fpdfapi/parser/cpdf_data_avail.cpp @@ -18,6 +18,7 @@ #include "core/fpdfapi/parser/cpdf_linearized_header.h" #include "core/fpdfapi/parser/cpdf_name.h" #include "core/fpdfapi/parser/cpdf_number.h" +#include "core/fpdfapi/parser/cpdf_read_validator.h" #include "core/fpdfapi/parser/cpdf_reference.h" #include "core/fpdfapi/parser/cpdf_stream.h" #include "core/fpdfapi/parser/fpdf_parser_utility.h" @@ -51,6 +52,27 @@ CPDF_Object* GetResourceObject(CPDF_Dictionary* pDict) { return nullptr; } +class HintsAssigner { + public: + HintsAssigner(CPDF_ReadValidator* validator, + CPDF_DataAvail::DownloadHints* hints) + : validator_(validator) { + if (validator_) { + validator_->ResetErrors(); + validator_->SetDownloadHints(hints); + } + } + + ~HintsAssigner() { + if (validator_) { + validator_->SetDownloadHints(nullptr); + } + } + + private: + CFX_UnownedPtr<CPDF_ReadValidator> validator_; +}; + } // namespace CPDF_DataAvail::FileAvail::~FileAvail() {} @@ -61,7 +83,11 @@ CPDF_DataAvail::CPDF_DataAvail( FileAvail* pFileAvail, const CFX_RetainPtr<IFX_SeekableReadStream>& pFileRead, bool bSupportHintTable) - : m_pFileAvail(pFileAvail), m_pFileRead(pFileRead) { + : m_pFileAvail(pFileAvail), + m_pFileRead( + pFileRead + ? pdfium::MakeRetain<CPDF_ReadValidator>(pFileRead, m_pFileAvail) + : nullptr) { m_Pos = 0; m_dwFileLen = 0; if (m_pFileRead) { @@ -211,6 +237,8 @@ bool CPDF_DataAvail::AreObjectsAvailable(std::vector<CPDF_Object*>& obj_array, CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail( DownloadHints* pHints) { + const HintsAssigner hints_assigner(m_pFileRead.Get(), pHints); + if (!m_dwFileLen && m_pFileRead) { m_dwFileLen = (uint32_t)m_pFileRead->GetSize(); if (!m_dwFileLen) @@ -285,7 +313,7 @@ bool CPDF_DataAvail::CheckDocStatus(DownloadHints* pHints) { case PDF_DATAAVAIL_CROSSREF: return CheckCrossRef(pHints); case PDF_DATAAVAIL_CROSSREF_ITEM: - return CheckCrossRefItem(pHints); + return CheckCrossRefItem(); case PDF_DATAAVAIL_CROSSREF_STREAM: return CheckAllCrossRefStream(pHints); case PDF_DATAAVAIL_TRAILER: @@ -955,14 +983,12 @@ bool CPDF_DataAvail::GetNextChar(uint8_t& ch) { return true; } -bool CPDF_DataAvail::CheckCrossRefItem(DownloadHints* pHints) { - int32_t iSize = 0; +bool CPDF_DataAvail::CheckCrossRefItem() { CFX_ByteString token; while (1) { if (!GetNextToken(&token)) { - iSize = static_cast<int32_t>( - m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); - pHints->AddSegment(m_Pos, iSize); + if (!m_pFileRead->has_read_problems()) + m_docStatus = PDF_DATAAVAIL_ERROR; return false; } @@ -1567,6 +1593,10 @@ bool CPDF_DataAvail::CheckResources(DownloadHints* pHints) { return true; } +CFX_RetainPtr<IFX_SeekableReadStream> CPDF_DataAvail::GetFileRead() const { + return m_pFileRead; +} + int CPDF_DataAvail::GetPageCount() const { if (m_pLinearized) return m_pLinearized->GetPageCount(); diff --git a/core/fpdfapi/parser/cpdf_data_avail.h b/core/fpdfapi/parser/cpdf_data_avail.h index eb45c144c8..f19b36d375 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.h +++ b/core/fpdfapi/parser/cpdf_data_avail.h @@ -21,6 +21,7 @@ class CPDF_HintTables; class CPDF_IndirectObjectHolder; class CPDF_LinearizedHeader; class CPDF_Parser; +class CPDF_ReadValidator; enum PDF_DATAAVAIL_STATUS { PDF_DATAAVAIL_HEADER = 0, @@ -107,9 +108,7 @@ class CPDF_DataAvail final { DocFormStatus IsFormAvail(DownloadHints* pHints); DocLinearizationStatus IsLinearizedPDF(); bool IsLinearized(); - CFX_RetainPtr<IFX_SeekableReadStream> GetFileRead() const { - return m_pFileRead; - } + CFX_RetainPtr<IFX_SeekableReadStream> GetFileRead() const; int GetPageCount() const; CPDF_Dictionary* GetPage(int index); @@ -137,7 +136,7 @@ class CPDF_DataAvail final { bool CheckHintTables(DownloadHints* pHints); bool CheckEnd(DownloadHints* pHints); bool CheckCrossRef(DownloadHints* pHints); - bool CheckCrossRefItem(DownloadHints* pHints); + bool CheckCrossRefItem(); bool CheckTrailer(DownloadHints* pHints); bool CheckRoot(DownloadHints* pHints); bool CheckInfo(DownloadHints* pHints); @@ -194,7 +193,7 @@ class CPDF_DataAvail final { bool ValidateForm(); FileAvail* const m_pFileAvail; - CFX_RetainPtr<IFX_SeekableReadStream> m_pFileRead; + CFX_RetainPtr<CPDF_ReadValidator> m_pFileRead; CPDF_Parser m_parser; CPDF_SyntaxParser m_syntaxParser; std::unique_ptr<CPDF_Object> m_pRoot; diff --git a/core/fpdfapi/parser/cpdf_read_validator.cpp b/core/fpdfapi/parser/cpdf_read_validator.cpp new file mode 100644 index 0000000000..148ecfd424 --- /dev/null +++ b/core/fpdfapi/parser/cpdf_read_validator.cpp @@ -0,0 +1,94 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/parser/cpdf_read_validator.h" + +#include <algorithm> + +#include "third_party/base/logging.h" + +namespace { + +constexpr FX_FILESIZE kAlignBlockValue = 512; + +FX_FILESIZE AlignDown(FX_FILESIZE offset) { + return offset > 0 ? (offset - offset % kAlignBlockValue) : 0; +} + +FX_FILESIZE AlignUp(FX_FILESIZE offset) { + FX_SAFE_FILESIZE safe_result = AlignDown(offset); + safe_result += kAlignBlockValue; + if (safe_result.IsValid()) + return safe_result.ValueOrDie(); + return offset; +} + +} // namespace + +CPDF_ReadValidator::CPDF_ReadValidator( + const CFX_RetainPtr<IFX_SeekableReadStream>& file_read, + CPDF_DataAvail::FileAvail* file_avail) + : file_read_(file_read), + file_avail_(file_avail), + read_error_(false), + has_unavailable_data_(false) { + ASSERT(file_read_); +} + +CPDF_ReadValidator::~CPDF_ReadValidator() {} + +void CPDF_ReadValidator::ResetErrors() { + read_error_ = false; + has_unavailable_data_ = false; +} + +bool CPDF_ReadValidator::ReadBlock(void* buffer, + FX_FILESIZE offset, + size_t size) { + // correct values checks: + if (!pdfium::base::IsValueInRangeForNumericType<uint32_t>(size)) + return false; + + FX_SAFE_FILESIZE end_offset = offset; + end_offset += size; + if (!end_offset.IsValid() || end_offset.ValueOrDie() > GetSize()) + return false; + + if (!file_avail_ || + file_avail_->IsDataAvail(offset, static_cast<uint32_t>(size))) { + if (file_read_->ReadBlock(buffer, offset, size)) + return true; + read_error_ = true; + } + has_unavailable_data_ = true; + ScheduleDownload(offset, size); + return false; +} + +FX_FILESIZE CPDF_ReadValidator::GetSize() { + return file_read_->GetSize(); +} + +void CPDF_ReadValidator::ScheduleDownload(FX_FILESIZE offset, size_t size) { + if (!hints_ || size == 0) + return; + + const FX_FILESIZE start_segment_offset = AlignDown(offset); + FX_SAFE_FILESIZE end_segment_offset = offset; + end_segment_offset += size; + if (!end_segment_offset.IsValid()) { + NOTREACHED(); + return; + } + end_segment_offset = + std::min(GetSize(), AlignUp(end_segment_offset.ValueOrDie())); + + FX_SAFE_UINT32 segment_size = end_segment_offset; + segment_size -= start_segment_offset; + if (!segment_size.IsValid()) { + NOTREACHED(); + return; + } + hints_->AddSegment(start_segment_offset, segment_size.ValueOrDie()); +} diff --git a/core/fpdfapi/parser/cpdf_read_validator.h b/core/fpdfapi/parser/cpdf_read_validator.h new file mode 100644 index 0000000000..da8acfe23b --- /dev/null +++ b/core/fpdfapi/parser/cpdf_read_validator.h @@ -0,0 +1,48 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CORE_FPDFAPI_PARSER_CPDF_READ_VALIDATOR_H_ +#define CORE_FPDFAPI_PARSER_CPDF_READ_VALIDATOR_H_ + +#include "core/fpdfapi/parser/cpdf_data_avail.h" + +class CPDF_ReadValidator : public IFX_SeekableReadStream { + public: + template <typename T, typename... Args> + friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args); + + void SetDownloadHints(CPDF_DataAvail::DownloadHints* hints) { + hints_ = hints; + } + + bool read_error() const { return read_error_; } + bool has_unavailable_data() const { return has_unavailable_data_; } + + bool has_read_problems() const { + return read_error() || has_unavailable_data(); + } + + void ResetErrors(); + + // IFX_SeekableReadStream overrides: + bool ReadBlock(void* buffer, FX_FILESIZE offset, size_t size) override; + FX_FILESIZE GetSize() override; + + private: + CPDF_ReadValidator(const CFX_RetainPtr<IFX_SeekableReadStream>& file_read, + CPDF_DataAvail::FileAvail* file_avail); + ~CPDF_ReadValidator() override; + + void ScheduleDownload(FX_FILESIZE offset, size_t size); + + CFX_RetainPtr<IFX_SeekableReadStream> file_read_; + CFX_UnownedPtr<CPDF_DataAvail::FileAvail> file_avail_; + + CFX_UnownedPtr<CPDF_DataAvail::DownloadHints> hints_; + + bool read_error_; + bool has_unavailable_data_; +}; + +#endif // CORE_FPDFAPI_PARSER_CPDF_READ_VALIDATOR_H_ diff --git a/core/fpdfapi/parser/cpdf_read_validator_unittest.cpp b/core/fpdfapi/parser/cpdf_read_validator_unittest.cpp new file mode 100644 index 0000000000..f0e47f552c --- /dev/null +++ b/core/fpdfapi/parser/cpdf_read_validator_unittest.cpp @@ -0,0 +1,180 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/parser/cpdf_read_validator.h" + +#include <limits> +#include <utility> +#include <vector> + +#include "core/fxcrt/cfx_memorystream.h" +#include "core/fxcrt/fx_stream.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + +constexpr uint32_t kTestDataSize = 64 * 1024 - 467; + +std::pair<FX_FILESIZE, FX_FILESIZE> MakeRange(uint32_t start, uint32_t end) { + return std::pair<FX_FILESIZE, FX_FILESIZE>(start, end); +} + +class MockFileAvail : public CPDF_DataAvail::FileAvail { + public: + MockFileAvail() : available_range_(0, 0) {} + ~MockFileAvail() override {} + + bool IsDataAvail(FX_FILESIZE offset, uint32_t size) override { + return available_range_.first <= offset && + available_range_.second >= static_cast<FX_FILESIZE>(offset + size); + } + + void SetAvailableRange(const std::pair<FX_FILESIZE, FX_FILESIZE>& range) { + available_range_ = range; + } + + void SetAvailableRange(uint32_t start, uint32_t end) { + SetAvailableRange(MakeRange(start, end)); + } + + private: + std::pair<FX_FILESIZE, FX_FILESIZE> available_range_; +}; + +class MockDownloadHints : public CPDF_DataAvail::DownloadHints { + public: + MockDownloadHints() : last_requested_range_(0, 0) {} + ~MockDownloadHints() override {} + + void AddSegment(FX_FILESIZE offset, uint32_t size) override { + last_requested_range_.first = offset; + last_requested_range_.second = offset + size; + } + + const std::pair<FX_FILESIZE, FX_FILESIZE>& GetLastRequstedRange() const { + return last_requested_range_; + } + + void Reset() { last_requested_range_ = MakeRange(0, 0); } + + private: + std::pair<FX_FILESIZE, FX_FILESIZE> last_requested_range_; +}; + +class InvalidReader : public IFX_SeekableReadStream { + public: + template <typename T, typename... Args> + friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args); + + // IFX_SeekableReadStream overrides: + bool ReadBlock(void* buffer, FX_FILESIZE offset, size_t size) override { + return false; + } + FX_FILESIZE GetSize() override { return kTestDataSize; } + + private: + InvalidReader() {} + ~InvalidReader() override {} +}; + +} // namespace + +TEST(CPDF_ReadValidatorTest, UnavailableData) { + std::vector<uint8_t> test_data(kTestDataSize); + auto file = pdfium::MakeRetain<CFX_MemoryStream>(test_data.data(), + test_data.size(), false); + MockFileAvail file_avail; + auto validator = pdfium::MakeRetain<CPDF_ReadValidator>(file, &file_avail); + + std::vector<uint8_t> read_buffer(100); + EXPECT_FALSE( + validator->ReadBlock(read_buffer.data(), 5000, read_buffer.size())); + + EXPECT_FALSE(validator->read_error()); + EXPECT_TRUE(validator->has_unavailable_data()); + + validator->ResetErrors(); + + file_avail.SetAvailableRange(5000, 5000 + read_buffer.size()); + + EXPECT_TRUE( + validator->ReadBlock(read_buffer.data(), 5000, read_buffer.size())); + EXPECT_FALSE(validator->read_error()); + EXPECT_FALSE(validator->has_unavailable_data()); +} + +TEST(CPDF_ReadValidatorTest, UnavailableDataWithHints) { + std::vector<uint8_t> test_data(kTestDataSize); + auto file = pdfium::MakeRetain<CFX_MemoryStream>(test_data.data(), + test_data.size(), false); + MockFileAvail file_avail; + auto validator = pdfium::MakeRetain<CPDF_ReadValidator>(file, &file_avail); + + MockDownloadHints hints; + validator->SetDownloadHints(&hints); + + std::vector<uint8_t> read_buffer(100); + + EXPECT_FALSE( + validator->ReadBlock(read_buffer.data(), 5000, read_buffer.size())); + EXPECT_FALSE(validator->read_error()); + EXPECT_TRUE(validator->has_unavailable_data()); + + // Requested range should be enlarged and aligned. + EXPECT_EQ(MakeRange(4608, 5120), hints.GetLastRequstedRange()); + + file_avail.SetAvailableRange(hints.GetLastRequstedRange()); + hints.Reset(); + + validator->ResetErrors(); + EXPECT_TRUE( + validator->ReadBlock(read_buffer.data(), 5000, read_buffer.size())); + // No new request on already available data. + EXPECT_EQ(MakeRange(0, 0), hints.GetLastRequstedRange()); + EXPECT_FALSE(validator->read_error()); + EXPECT_FALSE(validator->has_unavailable_data()); + + validator->ResetErrors(); + // Try read unavailable data at file end. + EXPECT_FALSE(validator->ReadBlock(read_buffer.data(), + validator->GetSize() - read_buffer.size(), + read_buffer.size())); + // Should not enlarge request at file end. + EXPECT_EQ(validator->GetSize(), hints.GetLastRequstedRange().second); + EXPECT_FALSE(validator->read_error()); + EXPECT_TRUE(validator->has_unavailable_data()); + + validator->SetDownloadHints(nullptr); +} + +TEST(CPDF_ReadValidatorTest, ReadError) { + auto file = pdfium::MakeRetain<InvalidReader>(); + auto validator = pdfium::MakeRetain<CPDF_ReadValidator>(file, nullptr); + + static const uint32_t kBufferSize = 3 * 1000; + std::vector<uint8_t> buffer(kBufferSize); + + EXPECT_FALSE(validator->ReadBlock(buffer.data(), 5000, 100)); + EXPECT_TRUE(validator->read_error()); + EXPECT_TRUE(validator->has_unavailable_data()); +} + +TEST(CPDF_ReadValidatorTest, IntOverflow) { + std::vector<uint8_t> test_data(kTestDataSize); + auto file = pdfium::MakeRetain<CFX_MemoryStream>(test_data.data(), + test_data.size(), false); + MockFileAvail file_avail; + auto validator = pdfium::MakeRetain<CPDF_ReadValidator>(file, &file_avail); + + std::vector<uint8_t> read_buffer(100); + + // If we have int overflow, this is equal reading after file end. This is not + // read_error, and in this case we have not unavailable data. It is just error + // of input params. + EXPECT_FALSE(validator->ReadBlock(read_buffer.data(), + std::numeric_limits<FX_FILESIZE>::max() - 1, + read_buffer.size())); + EXPECT_FALSE(validator->read_error()); + EXPECT_FALSE(validator->has_unavailable_data()); +} |