From 834ebece214f06c6e9fda803ab321e8453b3a54b Mon Sep 17 00:00:00 2001 From: Artem Strygin Date: Thu, 27 Jul 2017 14:01:32 +0300 Subject: Implement read validator. The wrapper for IFX_SeekableReadStream. Which allow us to check data availability on read request and request downloading of non available data on fly. Change-Id: I27c66cd58f43f8432f73104cc3f4c980515a9b56 Reviewed-on: https://pdfium-review.googlesource.com/9050 Commit-Queue: Art Snake Reviewed-by: (OOO Jul 28 - Aug 8) dsinclair --- BUILD.gn | 3 + core/fpdfapi/parser/cpdf_data_avail.cpp | 44 ++++- core/fpdfapi/parser/cpdf_data_avail.h | 9 +- core/fpdfapi/parser/cpdf_read_validator.cpp | 94 +++++++++++ core/fpdfapi/parser/cpdf_read_validator.h | 48 ++++++ .../parser/cpdf_read_validator_unittest.cpp | 180 +++++++++++++++++++++ fpdfsdk/fpdf_dataavail.cpp | 2 +- samples/pdfium_test.cc | 5 +- 8 files changed, 370 insertions(+), 15 deletions(-) create mode 100644 core/fpdfapi/parser/cpdf_read_validator.cpp create mode 100644 core/fpdfapi/parser/cpdf_read_validator.h create mode 100644 core/fpdfapi/parser/cpdf_read_validator_unittest.cpp diff --git a/BUILD.gn b/BUILD.gn index f2dbc6816f..db87b1631f 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -605,6 +605,8 @@ static_library("fpdfapi") { "core/fpdfapi/parser/cpdf_object.h", "core/fpdfapi/parser/cpdf_parser.cpp", "core/fpdfapi/parser/cpdf_parser.h", + "core/fpdfapi/parser/cpdf_read_validator.cpp", + "core/fpdfapi/parser/cpdf_read_validator.h", "core/fpdfapi/parser/cpdf_reference.cpp", "core/fpdfapi/parser/cpdf_reference.h", "core/fpdfapi/parser/cpdf_security_handler.cpp", @@ -1897,6 +1899,7 @@ test("pdfium_unittests") { "core/fpdfapi/parser/cpdf_document_unittest.cpp", "core/fpdfapi/parser/cpdf_object_unittest.cpp", "core/fpdfapi/parser/cpdf_parser_unittest.cpp", + "core/fpdfapi/parser/cpdf_read_validator_unittest.cpp", "core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp", "core/fpdfapi/parser/cpdf_syntax_parser_unittest.cpp", "core/fpdfapi/parser/fpdf_parser_decode_unittest.cpp", diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp index 043462c3bb..bc81e991ca 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.cpp +++ b/core/fpdfapi/parser/cpdf_data_avail.cpp @@ -18,6 +18,7 @@ #include "core/fpdfapi/parser/cpdf_linearized_header.h" #include "core/fpdfapi/parser/cpdf_name.h" #include "core/fpdfapi/parser/cpdf_number.h" +#include "core/fpdfapi/parser/cpdf_read_validator.h" #include "core/fpdfapi/parser/cpdf_reference.h" #include "core/fpdfapi/parser/cpdf_stream.h" #include "core/fpdfapi/parser/fpdf_parser_utility.h" @@ -51,6 +52,27 @@ CPDF_Object* GetResourceObject(CPDF_Dictionary* pDict) { return nullptr; } +class HintsAssigner { + public: + HintsAssigner(CPDF_ReadValidator* validator, + CPDF_DataAvail::DownloadHints* hints) + : validator_(validator) { + if (validator_) { + validator_->ResetErrors(); + validator_->SetDownloadHints(hints); + } + } + + ~HintsAssigner() { + if (validator_) { + validator_->SetDownloadHints(nullptr); + } + } + + private: + CFX_UnownedPtr validator_; +}; + } // namespace CPDF_DataAvail::FileAvail::~FileAvail() {} @@ -61,7 +83,11 @@ CPDF_DataAvail::CPDF_DataAvail( FileAvail* pFileAvail, const CFX_RetainPtr& pFileRead, bool bSupportHintTable) - : m_pFileAvail(pFileAvail), m_pFileRead(pFileRead) { + : m_pFileAvail(pFileAvail), + m_pFileRead( + pFileRead + ? pdfium::MakeRetain(pFileRead, m_pFileAvail) + : nullptr) { m_Pos = 0; m_dwFileLen = 0; if (m_pFileRead) { @@ -211,6 +237,8 @@ bool CPDF_DataAvail::AreObjectsAvailable(std::vector& obj_array, CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail( DownloadHints* pHints) { + const HintsAssigner hints_assigner(m_pFileRead.Get(), pHints); + if (!m_dwFileLen && m_pFileRead) { m_dwFileLen = (uint32_t)m_pFileRead->GetSize(); if (!m_dwFileLen) @@ -285,7 +313,7 @@ bool CPDF_DataAvail::CheckDocStatus(DownloadHints* pHints) { case PDF_DATAAVAIL_CROSSREF: return CheckCrossRef(pHints); case PDF_DATAAVAIL_CROSSREF_ITEM: - return CheckCrossRefItem(pHints); + return CheckCrossRefItem(); case PDF_DATAAVAIL_CROSSREF_STREAM: return CheckAllCrossRefStream(pHints); case PDF_DATAAVAIL_TRAILER: @@ -955,14 +983,12 @@ bool CPDF_DataAvail::GetNextChar(uint8_t& ch) { return true; } -bool CPDF_DataAvail::CheckCrossRefItem(DownloadHints* pHints) { - int32_t iSize = 0; +bool CPDF_DataAvail::CheckCrossRefItem() { CFX_ByteString token; while (1) { if (!GetNextToken(&token)) { - iSize = static_cast( - m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); - pHints->AddSegment(m_Pos, iSize); + if (!m_pFileRead->has_read_problems()) + m_docStatus = PDF_DATAAVAIL_ERROR; return false; } @@ -1567,6 +1593,10 @@ bool CPDF_DataAvail::CheckResources(DownloadHints* pHints) { return true; } +CFX_RetainPtr CPDF_DataAvail::GetFileRead() const { + return m_pFileRead; +} + int CPDF_DataAvail::GetPageCount() const { if (m_pLinearized) return m_pLinearized->GetPageCount(); diff --git a/core/fpdfapi/parser/cpdf_data_avail.h b/core/fpdfapi/parser/cpdf_data_avail.h index eb45c144c8..f19b36d375 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.h +++ b/core/fpdfapi/parser/cpdf_data_avail.h @@ -21,6 +21,7 @@ class CPDF_HintTables; class CPDF_IndirectObjectHolder; class CPDF_LinearizedHeader; class CPDF_Parser; +class CPDF_ReadValidator; enum PDF_DATAAVAIL_STATUS { PDF_DATAAVAIL_HEADER = 0, @@ -107,9 +108,7 @@ class CPDF_DataAvail final { DocFormStatus IsFormAvail(DownloadHints* pHints); DocLinearizationStatus IsLinearizedPDF(); bool IsLinearized(); - CFX_RetainPtr GetFileRead() const { - return m_pFileRead; - } + CFX_RetainPtr GetFileRead() const; int GetPageCount() const; CPDF_Dictionary* GetPage(int index); @@ -137,7 +136,7 @@ class CPDF_DataAvail final { bool CheckHintTables(DownloadHints* pHints); bool CheckEnd(DownloadHints* pHints); bool CheckCrossRef(DownloadHints* pHints); - bool CheckCrossRefItem(DownloadHints* pHints); + bool CheckCrossRefItem(); bool CheckTrailer(DownloadHints* pHints); bool CheckRoot(DownloadHints* pHints); bool CheckInfo(DownloadHints* pHints); @@ -194,7 +193,7 @@ class CPDF_DataAvail final { bool ValidateForm(); FileAvail* const m_pFileAvail; - CFX_RetainPtr m_pFileRead; + CFX_RetainPtr m_pFileRead; CPDF_Parser m_parser; CPDF_SyntaxParser m_syntaxParser; std::unique_ptr m_pRoot; diff --git a/core/fpdfapi/parser/cpdf_read_validator.cpp b/core/fpdfapi/parser/cpdf_read_validator.cpp new file mode 100644 index 0000000000..148ecfd424 --- /dev/null +++ b/core/fpdfapi/parser/cpdf_read_validator.cpp @@ -0,0 +1,94 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/parser/cpdf_read_validator.h" + +#include + +#include "third_party/base/logging.h" + +namespace { + +constexpr FX_FILESIZE kAlignBlockValue = 512; + +FX_FILESIZE AlignDown(FX_FILESIZE offset) { + return offset > 0 ? (offset - offset % kAlignBlockValue) : 0; +} + +FX_FILESIZE AlignUp(FX_FILESIZE offset) { + FX_SAFE_FILESIZE safe_result = AlignDown(offset); + safe_result += kAlignBlockValue; + if (safe_result.IsValid()) + return safe_result.ValueOrDie(); + return offset; +} + +} // namespace + +CPDF_ReadValidator::CPDF_ReadValidator( + const CFX_RetainPtr& file_read, + CPDF_DataAvail::FileAvail* file_avail) + : file_read_(file_read), + file_avail_(file_avail), + read_error_(false), + has_unavailable_data_(false) { + ASSERT(file_read_); +} + +CPDF_ReadValidator::~CPDF_ReadValidator() {} + +void CPDF_ReadValidator::ResetErrors() { + read_error_ = false; + has_unavailable_data_ = false; +} + +bool CPDF_ReadValidator::ReadBlock(void* buffer, + FX_FILESIZE offset, + size_t size) { + // correct values checks: + if (!pdfium::base::IsValueInRangeForNumericType(size)) + return false; + + FX_SAFE_FILESIZE end_offset = offset; + end_offset += size; + if (!end_offset.IsValid() || end_offset.ValueOrDie() > GetSize()) + return false; + + if (!file_avail_ || + file_avail_->IsDataAvail(offset, static_cast(size))) { + if (file_read_->ReadBlock(buffer, offset, size)) + return true; + read_error_ = true; + } + has_unavailable_data_ = true; + ScheduleDownload(offset, size); + return false; +} + +FX_FILESIZE CPDF_ReadValidator::GetSize() { + return file_read_->GetSize(); +} + +void CPDF_ReadValidator::ScheduleDownload(FX_FILESIZE offset, size_t size) { + if (!hints_ || size == 0) + return; + + const FX_FILESIZE start_segment_offset = AlignDown(offset); + FX_SAFE_FILESIZE end_segment_offset = offset; + end_segment_offset += size; + if (!end_segment_offset.IsValid()) { + NOTREACHED(); + return; + } + end_segment_offset = + std::min(GetSize(), AlignUp(end_segment_offset.ValueOrDie())); + + FX_SAFE_UINT32 segment_size = end_segment_offset; + segment_size -= start_segment_offset; + if (!segment_size.IsValid()) { + NOTREACHED(); + return; + } + hints_->AddSegment(start_segment_offset, segment_size.ValueOrDie()); +} diff --git a/core/fpdfapi/parser/cpdf_read_validator.h b/core/fpdfapi/parser/cpdf_read_validator.h new file mode 100644 index 0000000000..da8acfe23b --- /dev/null +++ b/core/fpdfapi/parser/cpdf_read_validator.h @@ -0,0 +1,48 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CORE_FPDFAPI_PARSER_CPDF_READ_VALIDATOR_H_ +#define CORE_FPDFAPI_PARSER_CPDF_READ_VALIDATOR_H_ + +#include "core/fpdfapi/parser/cpdf_data_avail.h" + +class CPDF_ReadValidator : public IFX_SeekableReadStream { + public: + template + friend CFX_RetainPtr pdfium::MakeRetain(Args&&... args); + + void SetDownloadHints(CPDF_DataAvail::DownloadHints* hints) { + hints_ = hints; + } + + bool read_error() const { return read_error_; } + bool has_unavailable_data() const { return has_unavailable_data_; } + + bool has_read_problems() const { + return read_error() || has_unavailable_data(); + } + + void ResetErrors(); + + // IFX_SeekableReadStream overrides: + bool ReadBlock(void* buffer, FX_FILESIZE offset, size_t size) override; + FX_FILESIZE GetSize() override; + + private: + CPDF_ReadValidator(const CFX_RetainPtr& file_read, + CPDF_DataAvail::FileAvail* file_avail); + ~CPDF_ReadValidator() override; + + void ScheduleDownload(FX_FILESIZE offset, size_t size); + + CFX_RetainPtr file_read_; + CFX_UnownedPtr file_avail_; + + CFX_UnownedPtr hints_; + + bool read_error_; + bool has_unavailable_data_; +}; + +#endif // CORE_FPDFAPI_PARSER_CPDF_READ_VALIDATOR_H_ diff --git a/core/fpdfapi/parser/cpdf_read_validator_unittest.cpp b/core/fpdfapi/parser/cpdf_read_validator_unittest.cpp new file mode 100644 index 0000000000..f0e47f552c --- /dev/null +++ b/core/fpdfapi/parser/cpdf_read_validator_unittest.cpp @@ -0,0 +1,180 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/parser/cpdf_read_validator.h" + +#include +#include +#include + +#include "core/fxcrt/cfx_memorystream.h" +#include "core/fxcrt/fx_stream.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + +constexpr uint32_t kTestDataSize = 64 * 1024 - 467; + +std::pair MakeRange(uint32_t start, uint32_t end) { + return std::pair(start, end); +} + +class MockFileAvail : public CPDF_DataAvail::FileAvail { + public: + MockFileAvail() : available_range_(0, 0) {} + ~MockFileAvail() override {} + + bool IsDataAvail(FX_FILESIZE offset, uint32_t size) override { + return available_range_.first <= offset && + available_range_.second >= static_cast(offset + size); + } + + void SetAvailableRange(const std::pair& range) { + available_range_ = range; + } + + void SetAvailableRange(uint32_t start, uint32_t end) { + SetAvailableRange(MakeRange(start, end)); + } + + private: + std::pair available_range_; +}; + +class MockDownloadHints : public CPDF_DataAvail::DownloadHints { + public: + MockDownloadHints() : last_requested_range_(0, 0) {} + ~MockDownloadHints() override {} + + void AddSegment(FX_FILESIZE offset, uint32_t size) override { + last_requested_range_.first = offset; + last_requested_range_.second = offset + size; + } + + const std::pair& GetLastRequstedRange() const { + return last_requested_range_; + } + + void Reset() { last_requested_range_ = MakeRange(0, 0); } + + private: + std::pair last_requested_range_; +}; + +class InvalidReader : public IFX_SeekableReadStream { + public: + template + friend CFX_RetainPtr pdfium::MakeRetain(Args&&... args); + + // IFX_SeekableReadStream overrides: + bool ReadBlock(void* buffer, FX_FILESIZE offset, size_t size) override { + return false; + } + FX_FILESIZE GetSize() override { return kTestDataSize; } + + private: + InvalidReader() {} + ~InvalidReader() override {} +}; + +} // namespace + +TEST(CPDF_ReadValidatorTest, UnavailableData) { + std::vector test_data(kTestDataSize); + auto file = pdfium::MakeRetain(test_data.data(), + test_data.size(), false); + MockFileAvail file_avail; + auto validator = pdfium::MakeRetain(file, &file_avail); + + std::vector read_buffer(100); + EXPECT_FALSE( + validator->ReadBlock(read_buffer.data(), 5000, read_buffer.size())); + + EXPECT_FALSE(validator->read_error()); + EXPECT_TRUE(validator->has_unavailable_data()); + + validator->ResetErrors(); + + file_avail.SetAvailableRange(5000, 5000 + read_buffer.size()); + + EXPECT_TRUE( + validator->ReadBlock(read_buffer.data(), 5000, read_buffer.size())); + EXPECT_FALSE(validator->read_error()); + EXPECT_FALSE(validator->has_unavailable_data()); +} + +TEST(CPDF_ReadValidatorTest, UnavailableDataWithHints) { + std::vector test_data(kTestDataSize); + auto file = pdfium::MakeRetain(test_data.data(), + test_data.size(), false); + MockFileAvail file_avail; + auto validator = pdfium::MakeRetain(file, &file_avail); + + MockDownloadHints hints; + validator->SetDownloadHints(&hints); + + std::vector read_buffer(100); + + EXPECT_FALSE( + validator->ReadBlock(read_buffer.data(), 5000, read_buffer.size())); + EXPECT_FALSE(validator->read_error()); + EXPECT_TRUE(validator->has_unavailable_data()); + + // Requested range should be enlarged and aligned. + EXPECT_EQ(MakeRange(4608, 5120), hints.GetLastRequstedRange()); + + file_avail.SetAvailableRange(hints.GetLastRequstedRange()); + hints.Reset(); + + validator->ResetErrors(); + EXPECT_TRUE( + validator->ReadBlock(read_buffer.data(), 5000, read_buffer.size())); + // No new request on already available data. + EXPECT_EQ(MakeRange(0, 0), hints.GetLastRequstedRange()); + EXPECT_FALSE(validator->read_error()); + EXPECT_FALSE(validator->has_unavailable_data()); + + validator->ResetErrors(); + // Try read unavailable data at file end. + EXPECT_FALSE(validator->ReadBlock(read_buffer.data(), + validator->GetSize() - read_buffer.size(), + read_buffer.size())); + // Should not enlarge request at file end. + EXPECT_EQ(validator->GetSize(), hints.GetLastRequstedRange().second); + EXPECT_FALSE(validator->read_error()); + EXPECT_TRUE(validator->has_unavailable_data()); + + validator->SetDownloadHints(nullptr); +} + +TEST(CPDF_ReadValidatorTest, ReadError) { + auto file = pdfium::MakeRetain(); + auto validator = pdfium::MakeRetain(file, nullptr); + + static const uint32_t kBufferSize = 3 * 1000; + std::vector buffer(kBufferSize); + + EXPECT_FALSE(validator->ReadBlock(buffer.data(), 5000, 100)); + EXPECT_TRUE(validator->read_error()); + EXPECT_TRUE(validator->has_unavailable_data()); +} + +TEST(CPDF_ReadValidatorTest, IntOverflow) { + std::vector test_data(kTestDataSize); + auto file = pdfium::MakeRetain(test_data.data(), + test_data.size(), false); + MockFileAvail file_avail; + auto validator = pdfium::MakeRetain(file, &file_avail); + + std::vector read_buffer(100); + + // If we have int overflow, this is equal reading after file end. This is not + // read_error, and in this case we have not unavailable data. It is just error + // of input params. + EXPECT_FALSE(validator->ReadBlock(read_buffer.data(), + std::numeric_limits::max() - 1, + read_buffer.size())); + EXPECT_FALSE(validator->read_error()); + EXPECT_FALSE(validator->has_unavailable_data()); +} diff --git a/fpdfsdk/fpdf_dataavail.cpp b/fpdfsdk/fpdf_dataavail.cpp index 2511441af5..85dd873cf3 100644 --- a/fpdfsdk/fpdf_dataavail.cpp +++ b/fpdfsdk/fpdf_dataavail.cpp @@ -105,9 +105,9 @@ class CFPDF_DataAvail { m_FileRead(pdfium::MakeRetain()) {} ~CFPDF_DataAvail() {} - std::unique_ptr m_pDataAvail; std::unique_ptr m_FileAvail; CFX_RetainPtr m_FileRead; + std::unique_ptr m_pDataAvail; }; CFPDF_DataAvail* CFPDFDataAvailFromFPDFAvail(FPDF_AVAIL avail) { diff --git a/samples/pdfium_test.cc b/samples/pdfium_test.cc index bb969b1c8d..ee95e2f3c1 100644 --- a/samples/pdfium_test.cc +++ b/samples/pdfium_test.cc @@ -1256,6 +1256,8 @@ void RenderPdf(const std::string& name, platform_callbacks.Doc_gotoPage = ExampleDocGotoPage; platform_callbacks.Doc_mail = ExampleDocMail; + // The pdf_avail must outlive doc. + std::unique_ptr pdf_avail; // The document must outlive |form_callbacks.loaded_pages|. std::unique_ptr doc; FPDF_FORMFILLINFO_PDFiumTest form_callbacks = {}; @@ -1283,8 +1285,7 @@ void RenderPdf(const std::string& name, int nRet = PDF_DATA_NOTAVAIL; bool bIsLinearized = false; - std::unique_ptr pdf_avail( - FPDFAvail_Create(&file_avail, &file_access)); + pdf_avail.reset(FPDFAvail_Create(&file_avail, &file_access)); if (FPDFAvail_IsLinearized(pdf_avail.get()) == PDF_LINEARIZED) { doc.reset(FPDFAvail_GetDocument(pdf_avail.get(), nullptr)); -- cgit v1.2.3