summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorArtem Strygin <art-snake@yandex-team.ru>2017-07-27 14:01:32 +0300
committerChromium commit bot <commit-bot@chromium.org>2017-07-28 00:03:54 +0000
commit834ebece214f06c6e9fda803ab321e8453b3a54b (patch)
tree259c4aead897943a48c99016d17cd63a04de5d12 /core
parent672a1721620c3f4e62fe6adfaceb929d423ae31f (diff)
downloadpdfium-834ebece214f06c6e9fda803ab321e8453b3a54b.tar.xz
Implement read validator.
The wrapper for IFX_SeekableReadStream. Which allow us to check data availability on read request and request downloading of non available data on fly. Change-Id: I27c66cd58f43f8432f73104cc3f4c980515a9b56 Reviewed-on: https://pdfium-review.googlesource.com/9050 Commit-Queue: Art Snake <art-snake@yandex-team.ru> Reviewed-by: (OOO Jul 28 - Aug 8) dsinclair <dsinclair@chromium.org>
Diffstat (limited to 'core')
-rw-r--r--core/fpdfapi/parser/cpdf_data_avail.cpp44
-rw-r--r--core/fpdfapi/parser/cpdf_data_avail.h9
-rw-r--r--core/fpdfapi/parser/cpdf_read_validator.cpp94
-rw-r--r--core/fpdfapi/parser/cpdf_read_validator.h48
-rw-r--r--core/fpdfapi/parser/cpdf_read_validator_unittest.cpp180
5 files changed, 363 insertions, 12 deletions
diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp
index 043462c3bb..bc81e991ca 100644
--- a/core/fpdfapi/parser/cpdf_data_avail.cpp
+++ b/core/fpdfapi/parser/cpdf_data_avail.cpp
@@ -18,6 +18,7 @@
#include "core/fpdfapi/parser/cpdf_linearized_header.h"
#include "core/fpdfapi/parser/cpdf_name.h"
#include "core/fpdfapi/parser/cpdf_number.h"
+#include "core/fpdfapi/parser/cpdf_read_validator.h"
#include "core/fpdfapi/parser/cpdf_reference.h"
#include "core/fpdfapi/parser/cpdf_stream.h"
#include "core/fpdfapi/parser/fpdf_parser_utility.h"
@@ -51,6 +52,27 @@ CPDF_Object* GetResourceObject(CPDF_Dictionary* pDict) {
return nullptr;
}
+class HintsAssigner {
+ public:
+ HintsAssigner(CPDF_ReadValidator* validator,
+ CPDF_DataAvail::DownloadHints* hints)
+ : validator_(validator) {
+ if (validator_) {
+ validator_->ResetErrors();
+ validator_->SetDownloadHints(hints);
+ }
+ }
+
+ ~HintsAssigner() {
+ if (validator_) {
+ validator_->SetDownloadHints(nullptr);
+ }
+ }
+
+ private:
+ CFX_UnownedPtr<CPDF_ReadValidator> validator_;
+};
+
} // namespace
CPDF_DataAvail::FileAvail::~FileAvail() {}
@@ -61,7 +83,11 @@ CPDF_DataAvail::CPDF_DataAvail(
FileAvail* pFileAvail,
const CFX_RetainPtr<IFX_SeekableReadStream>& pFileRead,
bool bSupportHintTable)
- : m_pFileAvail(pFileAvail), m_pFileRead(pFileRead) {
+ : m_pFileAvail(pFileAvail),
+ m_pFileRead(
+ pFileRead
+ ? pdfium::MakeRetain<CPDF_ReadValidator>(pFileRead, m_pFileAvail)
+ : nullptr) {
m_Pos = 0;
m_dwFileLen = 0;
if (m_pFileRead) {
@@ -211,6 +237,8 @@ bool CPDF_DataAvail::AreObjectsAvailable(std::vector<CPDF_Object*>& obj_array,
CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
DownloadHints* pHints) {
+ const HintsAssigner hints_assigner(m_pFileRead.Get(), pHints);
+
if (!m_dwFileLen && m_pFileRead) {
m_dwFileLen = (uint32_t)m_pFileRead->GetSize();
if (!m_dwFileLen)
@@ -285,7 +313,7 @@ bool CPDF_DataAvail::CheckDocStatus(DownloadHints* pHints) {
case PDF_DATAAVAIL_CROSSREF:
return CheckCrossRef(pHints);
case PDF_DATAAVAIL_CROSSREF_ITEM:
- return CheckCrossRefItem(pHints);
+ return CheckCrossRefItem();
case PDF_DATAAVAIL_CROSSREF_STREAM:
return CheckAllCrossRefStream(pHints);
case PDF_DATAAVAIL_TRAILER:
@@ -955,14 +983,12 @@ bool CPDF_DataAvail::GetNextChar(uint8_t& ch) {
return true;
}
-bool CPDF_DataAvail::CheckCrossRefItem(DownloadHints* pHints) {
- int32_t iSize = 0;
+bool CPDF_DataAvail::CheckCrossRefItem() {
CFX_ByteString token;
while (1) {
if (!GetNextToken(&token)) {
- iSize = static_cast<int32_t>(
- m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
- pHints->AddSegment(m_Pos, iSize);
+ if (!m_pFileRead->has_read_problems())
+ m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
}
@@ -1567,6 +1593,10 @@ bool CPDF_DataAvail::CheckResources(DownloadHints* pHints) {
return true;
}
+CFX_RetainPtr<IFX_SeekableReadStream> CPDF_DataAvail::GetFileRead() const {
+ return m_pFileRead;
+}
+
int CPDF_DataAvail::GetPageCount() const {
if (m_pLinearized)
return m_pLinearized->GetPageCount();
diff --git a/core/fpdfapi/parser/cpdf_data_avail.h b/core/fpdfapi/parser/cpdf_data_avail.h
index eb45c144c8..f19b36d375 100644
--- a/core/fpdfapi/parser/cpdf_data_avail.h
+++ b/core/fpdfapi/parser/cpdf_data_avail.h
@@ -21,6 +21,7 @@ class CPDF_HintTables;
class CPDF_IndirectObjectHolder;
class CPDF_LinearizedHeader;
class CPDF_Parser;
+class CPDF_ReadValidator;
enum PDF_DATAAVAIL_STATUS {
PDF_DATAAVAIL_HEADER = 0,
@@ -107,9 +108,7 @@ class CPDF_DataAvail final {
DocFormStatus IsFormAvail(DownloadHints* pHints);
DocLinearizationStatus IsLinearizedPDF();
bool IsLinearized();
- CFX_RetainPtr<IFX_SeekableReadStream> GetFileRead() const {
- return m_pFileRead;
- }
+ CFX_RetainPtr<IFX_SeekableReadStream> GetFileRead() const;
int GetPageCount() const;
CPDF_Dictionary* GetPage(int index);
@@ -137,7 +136,7 @@ class CPDF_DataAvail final {
bool CheckHintTables(DownloadHints* pHints);
bool CheckEnd(DownloadHints* pHints);
bool CheckCrossRef(DownloadHints* pHints);
- bool CheckCrossRefItem(DownloadHints* pHints);
+ bool CheckCrossRefItem();
bool CheckTrailer(DownloadHints* pHints);
bool CheckRoot(DownloadHints* pHints);
bool CheckInfo(DownloadHints* pHints);
@@ -194,7 +193,7 @@ class CPDF_DataAvail final {
bool ValidateForm();
FileAvail* const m_pFileAvail;
- CFX_RetainPtr<IFX_SeekableReadStream> m_pFileRead;
+ CFX_RetainPtr<CPDF_ReadValidator> m_pFileRead;
CPDF_Parser m_parser;
CPDF_SyntaxParser m_syntaxParser;
std::unique_ptr<CPDF_Object> m_pRoot;
diff --git a/core/fpdfapi/parser/cpdf_read_validator.cpp b/core/fpdfapi/parser/cpdf_read_validator.cpp
new file mode 100644
index 0000000000..148ecfd424
--- /dev/null
+++ b/core/fpdfapi/parser/cpdf_read_validator.cpp
@@ -0,0 +1,94 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfapi/parser/cpdf_read_validator.h"
+
+#include <algorithm>
+
+#include "third_party/base/logging.h"
+
+namespace {
+
+constexpr FX_FILESIZE kAlignBlockValue = 512;
+
+FX_FILESIZE AlignDown(FX_FILESIZE offset) {
+ return offset > 0 ? (offset - offset % kAlignBlockValue) : 0;
+}
+
+FX_FILESIZE AlignUp(FX_FILESIZE offset) {
+ FX_SAFE_FILESIZE safe_result = AlignDown(offset);
+ safe_result += kAlignBlockValue;
+ if (safe_result.IsValid())
+ return safe_result.ValueOrDie();
+ return offset;
+}
+
+} // namespace
+
+CPDF_ReadValidator::CPDF_ReadValidator(
+ const CFX_RetainPtr<IFX_SeekableReadStream>& file_read,
+ CPDF_DataAvail::FileAvail* file_avail)
+ : file_read_(file_read),
+ file_avail_(file_avail),
+ read_error_(false),
+ has_unavailable_data_(false) {
+ ASSERT(file_read_);
+}
+
+CPDF_ReadValidator::~CPDF_ReadValidator() {}
+
+void CPDF_ReadValidator::ResetErrors() {
+ read_error_ = false;
+ has_unavailable_data_ = false;
+}
+
+bool CPDF_ReadValidator::ReadBlock(void* buffer,
+ FX_FILESIZE offset,
+ size_t size) {
+ // correct values checks:
+ if (!pdfium::base::IsValueInRangeForNumericType<uint32_t>(size))
+ return false;
+
+ FX_SAFE_FILESIZE end_offset = offset;
+ end_offset += size;
+ if (!end_offset.IsValid() || end_offset.ValueOrDie() > GetSize())
+ return false;
+
+ if (!file_avail_ ||
+ file_avail_->IsDataAvail(offset, static_cast<uint32_t>(size))) {
+ if (file_read_->ReadBlock(buffer, offset, size))
+ return true;
+ read_error_ = true;
+ }
+ has_unavailable_data_ = true;
+ ScheduleDownload(offset, size);
+ return false;
+}
+
+FX_FILESIZE CPDF_ReadValidator::GetSize() {
+ return file_read_->GetSize();
+}
+
+void CPDF_ReadValidator::ScheduleDownload(FX_FILESIZE offset, size_t size) {
+ if (!hints_ || size == 0)
+ return;
+
+ const FX_FILESIZE start_segment_offset = AlignDown(offset);
+ FX_SAFE_FILESIZE end_segment_offset = offset;
+ end_segment_offset += size;
+ if (!end_segment_offset.IsValid()) {
+ NOTREACHED();
+ return;
+ }
+ end_segment_offset =
+ std::min(GetSize(), AlignUp(end_segment_offset.ValueOrDie()));
+
+ FX_SAFE_UINT32 segment_size = end_segment_offset;
+ segment_size -= start_segment_offset;
+ if (!segment_size.IsValid()) {
+ NOTREACHED();
+ return;
+ }
+ hints_->AddSegment(start_segment_offset, segment_size.ValueOrDie());
+}
diff --git a/core/fpdfapi/parser/cpdf_read_validator.h b/core/fpdfapi/parser/cpdf_read_validator.h
new file mode 100644
index 0000000000..da8acfe23b
--- /dev/null
+++ b/core/fpdfapi/parser/cpdf_read_validator.h
@@ -0,0 +1,48 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CORE_FPDFAPI_PARSER_CPDF_READ_VALIDATOR_H_
+#define CORE_FPDFAPI_PARSER_CPDF_READ_VALIDATOR_H_
+
+#include "core/fpdfapi/parser/cpdf_data_avail.h"
+
+class CPDF_ReadValidator : public IFX_SeekableReadStream {
+ public:
+ template <typename T, typename... Args>
+ friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args);
+
+ void SetDownloadHints(CPDF_DataAvail::DownloadHints* hints) {
+ hints_ = hints;
+ }
+
+ bool read_error() const { return read_error_; }
+ bool has_unavailable_data() const { return has_unavailable_data_; }
+
+ bool has_read_problems() const {
+ return read_error() || has_unavailable_data();
+ }
+
+ void ResetErrors();
+
+ // IFX_SeekableReadStream overrides:
+ bool ReadBlock(void* buffer, FX_FILESIZE offset, size_t size) override;
+ FX_FILESIZE GetSize() override;
+
+ private:
+ CPDF_ReadValidator(const CFX_RetainPtr<IFX_SeekableReadStream>& file_read,
+ CPDF_DataAvail::FileAvail* file_avail);
+ ~CPDF_ReadValidator() override;
+
+ void ScheduleDownload(FX_FILESIZE offset, size_t size);
+
+ CFX_RetainPtr<IFX_SeekableReadStream> file_read_;
+ CFX_UnownedPtr<CPDF_DataAvail::FileAvail> file_avail_;
+
+ CFX_UnownedPtr<CPDF_DataAvail::DownloadHints> hints_;
+
+ bool read_error_;
+ bool has_unavailable_data_;
+};
+
+#endif // CORE_FPDFAPI_PARSER_CPDF_READ_VALIDATOR_H_
diff --git a/core/fpdfapi/parser/cpdf_read_validator_unittest.cpp b/core/fpdfapi/parser/cpdf_read_validator_unittest.cpp
new file mode 100644
index 0000000000..f0e47f552c
--- /dev/null
+++ b/core/fpdfapi/parser/cpdf_read_validator_unittest.cpp
@@ -0,0 +1,180 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfapi/parser/cpdf_read_validator.h"
+
+#include <limits>
+#include <utility>
+#include <vector>
+
+#include "core/fxcrt/cfx_memorystream.h"
+#include "core/fxcrt/fx_stream.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace {
+
+constexpr uint32_t kTestDataSize = 64 * 1024 - 467;
+
+std::pair<FX_FILESIZE, FX_FILESIZE> MakeRange(uint32_t start, uint32_t end) {
+ return std::pair<FX_FILESIZE, FX_FILESIZE>(start, end);
+}
+
+class MockFileAvail : public CPDF_DataAvail::FileAvail {
+ public:
+ MockFileAvail() : available_range_(0, 0) {}
+ ~MockFileAvail() override {}
+
+ bool IsDataAvail(FX_FILESIZE offset, uint32_t size) override {
+ return available_range_.first <= offset &&
+ available_range_.second >= static_cast<FX_FILESIZE>(offset + size);
+ }
+
+ void SetAvailableRange(const std::pair<FX_FILESIZE, FX_FILESIZE>& range) {
+ available_range_ = range;
+ }
+
+ void SetAvailableRange(uint32_t start, uint32_t end) {
+ SetAvailableRange(MakeRange(start, end));
+ }
+
+ private:
+ std::pair<FX_FILESIZE, FX_FILESIZE> available_range_;
+};
+
+class MockDownloadHints : public CPDF_DataAvail::DownloadHints {
+ public:
+ MockDownloadHints() : last_requested_range_(0, 0) {}
+ ~MockDownloadHints() override {}
+
+ void AddSegment(FX_FILESIZE offset, uint32_t size) override {
+ last_requested_range_.first = offset;
+ last_requested_range_.second = offset + size;
+ }
+
+ const std::pair<FX_FILESIZE, FX_FILESIZE>& GetLastRequstedRange() const {
+ return last_requested_range_;
+ }
+
+ void Reset() { last_requested_range_ = MakeRange(0, 0); }
+
+ private:
+ std::pair<FX_FILESIZE, FX_FILESIZE> last_requested_range_;
+};
+
+class InvalidReader : public IFX_SeekableReadStream {
+ public:
+ template <typename T, typename... Args>
+ friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args);
+
+ // IFX_SeekableReadStream overrides:
+ bool ReadBlock(void* buffer, FX_FILESIZE offset, size_t size) override {
+ return false;
+ }
+ FX_FILESIZE GetSize() override { return kTestDataSize; }
+
+ private:
+ InvalidReader() {}
+ ~InvalidReader() override {}
+};
+
+} // namespace
+
+TEST(CPDF_ReadValidatorTest, UnavailableData) {
+ std::vector<uint8_t> test_data(kTestDataSize);
+ auto file = pdfium::MakeRetain<CFX_MemoryStream>(test_data.data(),
+ test_data.size(), false);
+ MockFileAvail file_avail;
+ auto validator = pdfium::MakeRetain<CPDF_ReadValidator>(file, &file_avail);
+
+ std::vector<uint8_t> read_buffer(100);
+ EXPECT_FALSE(
+ validator->ReadBlock(read_buffer.data(), 5000, read_buffer.size()));
+
+ EXPECT_FALSE(validator->read_error());
+ EXPECT_TRUE(validator->has_unavailable_data());
+
+ validator->ResetErrors();
+
+ file_avail.SetAvailableRange(5000, 5000 + read_buffer.size());
+
+ EXPECT_TRUE(
+ validator->ReadBlock(read_buffer.data(), 5000, read_buffer.size()));
+ EXPECT_FALSE(validator->read_error());
+ EXPECT_FALSE(validator->has_unavailable_data());
+}
+
+TEST(CPDF_ReadValidatorTest, UnavailableDataWithHints) {
+ std::vector<uint8_t> test_data(kTestDataSize);
+ auto file = pdfium::MakeRetain<CFX_MemoryStream>(test_data.data(),
+ test_data.size(), false);
+ MockFileAvail file_avail;
+ auto validator = pdfium::MakeRetain<CPDF_ReadValidator>(file, &file_avail);
+
+ MockDownloadHints hints;
+ validator->SetDownloadHints(&hints);
+
+ std::vector<uint8_t> read_buffer(100);
+
+ EXPECT_FALSE(
+ validator->ReadBlock(read_buffer.data(), 5000, read_buffer.size()));
+ EXPECT_FALSE(validator->read_error());
+ EXPECT_TRUE(validator->has_unavailable_data());
+
+ // Requested range should be enlarged and aligned.
+ EXPECT_EQ(MakeRange(4608, 5120), hints.GetLastRequstedRange());
+
+ file_avail.SetAvailableRange(hints.GetLastRequstedRange());
+ hints.Reset();
+
+ validator->ResetErrors();
+ EXPECT_TRUE(
+ validator->ReadBlock(read_buffer.data(), 5000, read_buffer.size()));
+ // No new request on already available data.
+ EXPECT_EQ(MakeRange(0, 0), hints.GetLastRequstedRange());
+ EXPECT_FALSE(validator->read_error());
+ EXPECT_FALSE(validator->has_unavailable_data());
+
+ validator->ResetErrors();
+ // Try read unavailable data at file end.
+ EXPECT_FALSE(validator->ReadBlock(read_buffer.data(),
+ validator->GetSize() - read_buffer.size(),
+ read_buffer.size()));
+ // Should not enlarge request at file end.
+ EXPECT_EQ(validator->GetSize(), hints.GetLastRequstedRange().second);
+ EXPECT_FALSE(validator->read_error());
+ EXPECT_TRUE(validator->has_unavailable_data());
+
+ validator->SetDownloadHints(nullptr);
+}
+
+TEST(CPDF_ReadValidatorTest, ReadError) {
+ auto file = pdfium::MakeRetain<InvalidReader>();
+ auto validator = pdfium::MakeRetain<CPDF_ReadValidator>(file, nullptr);
+
+ static const uint32_t kBufferSize = 3 * 1000;
+ std::vector<uint8_t> buffer(kBufferSize);
+
+ EXPECT_FALSE(validator->ReadBlock(buffer.data(), 5000, 100));
+ EXPECT_TRUE(validator->read_error());
+ EXPECT_TRUE(validator->has_unavailable_data());
+}
+
+TEST(CPDF_ReadValidatorTest, IntOverflow) {
+ std::vector<uint8_t> test_data(kTestDataSize);
+ auto file = pdfium::MakeRetain<CFX_MemoryStream>(test_data.data(),
+ test_data.size(), false);
+ MockFileAvail file_avail;
+ auto validator = pdfium::MakeRetain<CPDF_ReadValidator>(file, &file_avail);
+
+ std::vector<uint8_t> read_buffer(100);
+
+ // If we have int overflow, this is equal reading after file end. This is not
+ // read_error, and in this case we have not unavailable data. It is just error
+ // of input params.
+ EXPECT_FALSE(validator->ReadBlock(read_buffer.data(),
+ std::numeric_limits<FX_FILESIZE>::max() - 1,
+ read_buffer.size()));
+ EXPECT_FALSE(validator->read_error());
+ EXPECT_FALSE(validator->has_unavailable_data());
+}