From 6058efdbdc186e120e7e2121c290ac4d820ffbf8 Mon Sep 17 00:00:00 2001 From: Tom Sepez Date: Fri, 6 Apr 2018 23:48:24 +0000 Subject: Add span.h from chromium base. Allows indexing with better bounds-checking to occur. Some small modifications are required to deal with PDFium being intentionally held at C++11 compliance, not C++14. Use in one place as check on correctness. Change-Id: Id2875cf0a93980112bc536a93c4f9ec5306c0dac Reviewed-on: https://pdfium-review.googlesource.com/29671 Commit-Queue: Tom Sepez Reviewed-by: Chris Palmer Reviewed-by: dsinclair --- core/fpdfapi/page/cpdf_streamcontentparser.cpp | 4 +- core/fpdfapi/page/cpdf_streamparser.cpp | 23 +- core/fpdfapi/page/cpdf_streamparser.h | 15 +- core/fpdfapi/page/cpdf_streamparser_unittest.cpp | 11 +- core/fxcrt/unowned_ptr.h | 3 +- testing/libfuzzer/pdf_streamparser_fuzzer.cc | 3 +- third_party/BUILD.gn | 1 + third_party/base/span.h | 336 +++++++++++++++++++++++ 8 files changed, 368 insertions(+), 28 deletions(-) create mode 100644 third_party/base/span.h diff --git a/core/fpdfapi/page/cpdf_streamcontentparser.cpp b/core/fpdfapi/page/cpdf_streamcontentparser.cpp index 697349e987..c251c5ec66 100644 --- a/core/fpdfapi/page/cpdf_streamcontentparser.cpp +++ b/core/fpdfapi/page/cpdf_streamcontentparser.cpp @@ -37,6 +37,7 @@ #include "core/fxge/cfx_graphstatedata.h" #include "third_party/base/logging.h" #include "third_party/base/ptr_util.h" +#include "third_party/base/span.h" #include "third_party/base/stl_util.h" namespace { @@ -1515,7 +1516,8 @@ uint32_t CPDF_StreamContentParser::Parse(const uint8_t* pData, pData); uint32_t InitObjCount = m_pObjectHolder->GetPageObjectList()->size(); - CPDF_StreamParser syntax(pData, dwSize, m_pDocument->GetByteStringPool()); + CPDF_StreamParser syntax(pdfium::make_span(pData, dwSize), + m_pDocument->GetByteStringPool()); CPDF_StreamParserAutoClearer auto_clearer(&m_pSyntax, &syntax); while (1) { uint32_t cost = m_pObjectHolder->GetPageObjectList()->size() - InitObjCount; diff --git a/core/fpdfapi/page/cpdf_streamparser.cpp b/core/fpdfapi/page/cpdf_streamparser.cpp index 3b6d12038f..c6660b799b 100644 --- a/core/fpdfapi/page/cpdf_streamparser.cpp +++ b/core/fpdfapi/page/cpdf_streamparser.cpp @@ -101,13 +101,12 @@ uint32_t DecodeInlineStream(const uint8_t* src_buf, } // namespace -CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize) - : m_Size(dwSize), m_Pos(0), m_WordSize(0), m_pBuf(pData) {} +CPDF_StreamParser::CPDF_StreamParser(pdfium::span span) + : m_pBuf(span) {} -CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, - uint32_t dwSize, +CPDF_StreamParser::CPDF_StreamParser(pdfium::span span, const WeakPtr& pPool) - : m_Size(dwSize), m_Pos(0), m_WordSize(0), m_pBuf(pData), m_pPool(pPool) {} + : m_pPool(pPool), m_pBuf(span) {} CPDF_StreamParser::~CPDF_StreamParser() {} @@ -115,7 +114,7 @@ std::unique_ptr CPDF_StreamParser::ReadInlineStream( CPDF_Document* pDoc, std::unique_ptr pDict, CPDF_Object* pCSObj) { - if (m_Pos == m_Size) + if (m_Pos == m_pBuf.size()) return nullptr; if (PDFCharIsWhitespace(m_pBuf[m_Pos])) @@ -176,17 +175,17 @@ std::unique_ptr CPDF_StreamParser::ReadInlineStream( std::unique_ptr pData; uint32_t dwStreamSize; if (Decoder.IsEmpty()) { - if (OrigSize > m_Size - m_Pos) - OrigSize = m_Size - m_Pos; + if (OrigSize > m_pBuf.size() - m_Pos) + OrigSize = m_pBuf.size() - m_Pos; pData.reset(FX_Alloc(uint8_t, OrigSize)); - memcpy(pData.get(), m_pBuf + m_Pos, OrigSize); + memcpy(pData.get(), &m_pBuf[m_Pos], OrigSize); dwStreamSize = OrigSize; m_Pos += OrigSize; } else { uint8_t* pIgnore = nullptr; uint32_t dwDestSize = OrigSize; dwStreamSize = - DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height, + DecodeInlineStream(&m_pBuf[m_Pos], m_pBuf.size() - m_Pos, width, height, Decoder, pParam, &pIgnore, &dwDestSize); FX_Free(pIgnore); if (static_cast(dwStreamSize) < 0) @@ -212,7 +211,7 @@ std::unique_ptr CPDF_StreamParser::ReadInlineStream( } m_Pos = dwSavePos; pData.reset(FX_Alloc(uint8_t, dwStreamSize)); - memcpy(pData.get(), m_pBuf + m_Pos, dwStreamSize); + memcpy(pData.get(), &m_pBuf[m_Pos], dwStreamSize); m_Pos += dwStreamSize; } pDict->SetNewFor("Length", static_cast(dwStreamSize)); @@ -603,5 +602,5 @@ ByteString CPDF_StreamParser::ReadHexString() { } bool CPDF_StreamParser::PositionIsInBounds() const { - return m_Pos < m_Size; + return m_Pos < m_pBuf.size(); } diff --git a/core/fpdfapi/page/cpdf_streamparser.h b/core/fpdfapi/page/cpdf_streamparser.h index bdd07643ce..78727da481 100644 --- a/core/fpdfapi/page/cpdf_streamparser.h +++ b/core/fpdfapi/page/cpdf_streamparser.h @@ -16,14 +16,14 @@ #include "core/fpdfapi/parser/cpdf_stream.h" #include "core/fxcrt/string_pool_template.h" #include "core/fxcrt/weak_ptr.h" +#include "third_party/base/span.h" class CPDF_StreamParser { public: enum SyntaxType { EndOfData, Number, Keyword, Name, Others }; - CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize); - CPDF_StreamParser(const uint8_t* pData, - uint32_t dwSize, + explicit CPDF_StreamParser(pdfium::span span); + CPDF_StreamParser(pdfium::span span, const WeakPtr& pPool); ~CPDF_StreamParser(); @@ -51,12 +51,11 @@ class CPDF_StreamParser { ByteString ReadHexString(); bool PositionIsInBounds() const; - uint32_t m_Size; // Length in bytes of m_pBuf. - uint32_t m_Pos; // Current byte position within m_pBuf. - uint32_t m_WordSize; // Current byte position within m_WordBuffer. - const uint8_t* m_pBuf; - std::unique_ptr m_pLastObj; + uint32_t m_Pos = 0; // Current byte position within m_pBuf. + uint32_t m_WordSize = 0; // Current byte position within m_WordBuffer. WeakPtr m_pPool; + std::unique_ptr m_pLastObj; + pdfium::span m_pBuf; uint8_t m_WordBuffer[kMaxWordLength + 1]; // Include space for NUL. }; diff --git a/core/fpdfapi/page/cpdf_streamparser_unittest.cpp b/core/fpdfapi/page/cpdf_streamparser_unittest.cpp index 40a41befe1..d83fedcb7d 100644 --- a/core/fpdfapi/page/cpdf_streamparser_unittest.cpp +++ b/core/fpdfapi/page/cpdf_streamparser_unittest.cpp @@ -4,12 +4,13 @@ #include "core/fpdfapi/page/cpdf_streamparser.h" #include "testing/gtest/include/gtest/gtest.h" +#include "third_party/base/span.h" TEST(cpdf_streamparser, ReadHexString) { { // Position out of bounds. uint8_t data[] = "12ab>"; - CPDF_StreamParser parser(data, 5); + CPDF_StreamParser parser(data); parser.SetPos(6); EXPECT_EQ("", parser.ReadHexString()); } @@ -17,7 +18,7 @@ TEST(cpdf_streamparser, ReadHexString) { { // Regular conversion. uint8_t data[] = "1A2b>abcd"; - CPDF_StreamParser parser(data, 5); + CPDF_StreamParser parser(data); EXPECT_EQ("\x1a\x2b", parser.ReadHexString()); EXPECT_EQ(5u, parser.GetPos()); } @@ -25,7 +26,7 @@ TEST(cpdf_streamparser, ReadHexString) { { // Missing ending > uint8_t data[] = "1A2b"; - CPDF_StreamParser parser(data, 5); + CPDF_StreamParser parser(data); EXPECT_EQ("\x1a\x2b", parser.ReadHexString()); EXPECT_EQ(5u, parser.GetPos()); } @@ -33,14 +34,14 @@ TEST(cpdf_streamparser, ReadHexString) { { // Uneven number of bytes. uint8_t data[] = "1A2>asdf"; - CPDF_StreamParser parser(data, 5); + CPDF_StreamParser parser(data); EXPECT_EQ("\x1a\x20", parser.ReadHexString()); EXPECT_EQ(4u, parser.GetPos()); } { uint8_t data[] = ">"; - CPDF_StreamParser parser(data, 5); + CPDF_StreamParser parser(data); EXPECT_EQ("", parser.ReadHexString()); EXPECT_EQ(1u, parser.GetPos()); } diff --git a/core/fxcrt/unowned_ptr.h b/core/fxcrt/unowned_ptr.h index f9753cde37..b1c9c66b3e 100644 --- a/core/fxcrt/unowned_ptr.h +++ b/core/fxcrt/unowned_ptr.h @@ -32,7 +32,8 @@ // // The array indexing operation [] is not supported on an unowned ptr, // because an unowned ptr expresses a one to one relationship with some -// other heap object. +// other heap object. Use pdfium::span<> for the cases where indexing +// into an unowned array is desired, which performs the same checks. namespace fxcrt { diff --git a/testing/libfuzzer/pdf_streamparser_fuzzer.cc b/testing/libfuzzer/pdf_streamparser_fuzzer.cc index 46113d42c6..4d9a368916 100644 --- a/testing/libfuzzer/pdf_streamparser_fuzzer.cc +++ b/testing/libfuzzer/pdf_streamparser_fuzzer.cc @@ -7,9 +7,10 @@ #include "core/fpdfapi/page/cpdf_streamparser.h" #include "core/fpdfapi/parser/cpdf_object.h" +#include "third_party/base/span.h" extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - CPDF_StreamParser parser(data, size); + CPDF_StreamParser parser(pdfium::make_span(data, size)); while (std::unique_ptr pObj = parser.ReadNextObject(true, false, 0)) continue; diff --git a/third_party/BUILD.gn b/third_party/BUILD.gn index 1bff4347df..8b212e1622 100644 --- a/third_party/BUILD.gn +++ b/third_party/BUILD.gn @@ -562,6 +562,7 @@ jumbo_source_set("pdfium_base") { "base/numerics/safe_math_impl.h", "base/optional.h", "base/ptr_util.h", + "base/span.h", "base/stl_util.h", "base/sys_byteorder.h", "base/template_util.h", diff --git a/third_party/base/span.h b/third_party/base/span.h new file mode 100644 index 0000000000..d8d8f29e7c --- /dev/null +++ b/third_party/base/span.h @@ -0,0 +1,336 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef PDFIUM_THIRD_PARTY_BASE_SPAN_H_ +#define PDFIUM_THIRD_PARTY_BASE_SPAN_H_ + +#include + +#include +#include +#include +#include +#include + +#include "third_party/base/logging.h" + +namespace pdfium { + +template +class span; + +namespace internal { + +template +struct IsSpanImpl : std::false_type {}; + +template +struct IsSpanImpl> : std::true_type {}; + +template +using IsSpan = IsSpanImpl::type>; + +template +struct IsStdArrayImpl : std::false_type {}; + +template +struct IsStdArrayImpl> : std::true_type {}; + +template +using IsStdArray = IsStdArrayImpl::type>; + +template +using IsLegalSpanConversion = std::is_convertible; + +template +using ContainerHasConvertibleData = + IsLegalSpanConversion().data())>::type, + T>; +template +using ContainerHasIntegralSize = + std::is_integral().size())>; + +template +using EnableIfLegalSpanConversion = + typename std::enable_if::value>::type; + +// SFINAE check if Container can be converted to a span. Note that the +// implementation details of this check differ slightly from the requirements in +// the working group proposal: in particular, the proposal also requires that +// the container conversion constructor participate in overload resolution only +// if two additional conditions are true: +// +// 1. Container implements operator[]. +// 2. Container::value_type matches remove_const_t. +// +// The requirements are relaxed slightly here: in particular, not requiring (2) +// means that an immutable span can be easily constructed from a mutable +// container. +template +using EnableIfSpanCompatibleContainer = + typename std::enable_if::value && + !internal::IsStdArray::value && + ContainerHasConvertibleData::value && + ContainerHasIntegralSize::value>::type; + +template +using EnableIfConstSpanCompatibleContainer = + typename std::enable_if::value && + !internal::IsSpan::value && + !internal::IsStdArray::value && + ContainerHasConvertibleData::value && + ContainerHasIntegralSize::value>::type; + +} // namespace internal + +// A span is a value type that represents an array of elements of type T. Since +// it only consists of a pointer to memory with an associated size, it is very +// light-weight. It is cheap to construct, copy, move and use spans, so that +// users are encouraged to use it as a pass-by-value parameter. A span does not +// own the underlying memory, so care must be taken to ensure that a span does +// not outlive the backing store. +// +// span is somewhat analogous to StringPiece, but with arbitrary element types, +// allowing mutation if T is non-const. +// +// span is implicitly convertible from C++ arrays, as well as most [1] +// container-like types that provide a data() and size() method (such as +// std::vector). A mutable span can also be implicitly converted to an +// immutable span. +// +// Consider using a span for functions that take a data pointer and size +// parameter: it allows the function to still act on an array-like type, while +// allowing the caller code to be a bit more concise. +// +// For read-only data access pass a span: the caller can supply either +// a span or a span, while the callee will have a read-only view. +// For read-write access a mutable span is required. +// +// Without span: +// Read-Only: +// // std::string HexEncode(const uint8_t* data, size_t size); +// std::vector data_buffer = GenerateData(); +// std::string r = HexEncode(data_buffer.data(), data_buffer.size()); +// +// Mutable: +// // ssize_t SafeSNPrintf(char* buf, size_t N, const char* fmt, Args...); +// char str_buffer[100]; +// SafeSNPrintf(str_buffer, sizeof(str_buffer), "Pi ~= %lf", 3.14); +// +// With span: +// Read-Only: +// // std::string HexEncode(base::span data); +// std::vector data_buffer = GenerateData(); +// std::string r = HexEncode(data_buffer); +// +// Mutable: +// // ssize_t SafeSNPrintf(base::span, const char* fmt, Args...); +// char str_buffer[100]; +// SafeSNPrintf(str_buffer, "Pi ~= %lf", 3.14); +// +// Spans with "const" and pointers +// ------------------------------- +// +// Const and pointers can get confusing. Here are vectors of pointers and their +// corresponding spans (you can always make the span "more const" too): +// +// const std::vector => base::span +// std::vector => base::span +// const std::vector => base::span +// +// Differences from the working group proposal +// ------------------------------------------- +// +// https://wg21.link/P0122 is the latest working group proposal, Chromium +// currently implements R6. The biggest difference is span does not support a +// static extent template parameter. Other differences are documented in +// subsections below. +// +// Differences from [views.constants]: +// - no dynamic_extent constant +// +// Differences from [span.objectrep]: +// - no as_bytes() +// - no as_writeable_bytes() +// +// Differences in constants and types: +// - no element_type type alias +// - no index_type type alias +// - no different_type type alias +// - no extent constant +// +// Differences from [span.cons]: +// - no constructor from a pointer range +// - no constructor from std::array +// +// Differences from [span.sub]: +// - no templated first() +// - no templated last() +// - no templated subspan() +// - using size_t instead of ptrdiff_t for indexing +// +// Differences from [span.obs]: +// - no size_bytes() +// - using size_t instead of ptrdiff_t to represent size() +// +// Differences from [span.elem]: +// - no operator ()() +// - using size_t instead of ptrdiff_t for indexing + +// [span], class template span +template +class span { + public: + using value_type = typename std::remove_cv::type; + using pointer = T*; + using reference = T&; + using iterator = T*; + using const_iterator = const T*; + using reverse_iterator = std::reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; + + // [span.cons], span constructors, copy, assignment, and destructor + constexpr span() noexcept : data_(nullptr), size_(0) {} + constexpr span(T* data, size_t size) noexcept : data_(data), size_(size) {} + // TODO(dcheng): Implement construction from a |begin| and |end| pointer. + template + constexpr span(T (&array)[N]) noexcept : span(array, N) {} + // TODO(dcheng): Implement construction from std::array. + // Conversion from a container that provides |T* data()| and |integral_type + // size()|. + template > + constexpr span(Container& container) + : span(container.data(), container.size()) {} + template < + typename Container, + typename = internal::EnableIfConstSpanCompatibleContainer> + span(const Container& container) : span(container.data(), container.size()) {} + constexpr span(const span& other) noexcept = default; + // Conversions from spans of compatible types: this allows a span to be + // seamlessly used as a span, but not the other way around. + template > + constexpr span(const span& other) : span(other.data(), other.size()) {} + span& operator=(const span& other) noexcept = default; + ~span() noexcept = default; + + // [span.sub], span subviews + const span first(size_t count) const { + CHECK(count <= size_); + return span(data_, count); + } + + const span last(size_t count) const { + CHECK(count <= size_); + return span(data_ + (size_ - count), count); + } + + const span subspan(size_t pos, size_t count = -1) const { + const auto npos = static_cast(-1); + CHECK(pos <= size_); + CHECK(count == npos || count <= size_ - pos); + return span(data_ + pos, count == npos ? size_ - pos : count); + } + + // [span.obs], span observers + constexpr size_t size() const noexcept { return size_; } + constexpr bool empty() const noexcept { return size_ == 0; } + + // [span.elem], span element access + const T& operator[](size_t index) const noexcept { + CHECK(index < size_); + return data_[index]; + } + constexpr T* data() const noexcept { return data_; } + + // [span.iter], span iterator support + constexpr iterator begin() const noexcept { return data_; } + constexpr iterator end() const noexcept { return data_ + size_; } + + constexpr const_iterator cbegin() const noexcept { return begin(); } + constexpr const_iterator cend() const noexcept { return end(); } + + constexpr reverse_iterator rbegin() const noexcept { + return reverse_iterator(end()); + } + constexpr reverse_iterator rend() const noexcept { + return reverse_iterator(begin()); + } + + constexpr const_reverse_iterator crbegin() const noexcept { + return const_reverse_iterator(cend()); + } + constexpr const_reverse_iterator crend() const noexcept { + return const_reverse_iterator(cbegin()); + } + + private: + T* data_; + size_t size_; +}; + +// [span.comparison], span comparison operators +// Relational operators. Equality is a element-wise comparison. +template +constexpr bool operator==(span lhs, span rhs) noexcept { + return lhs.size() == rhs.size() && + std::equal(lhs.cbegin(), lhs.cend(), rhs.cbegin()); +} + +template +constexpr bool operator!=(span lhs, span rhs) noexcept { + return !(lhs == rhs); +} + +template +constexpr bool operator<(span lhs, span rhs) noexcept { + return std::lexicographical_compare(lhs.cbegin(), lhs.cend(), rhs.cbegin(), + rhs.cend()); +} + +template +constexpr bool operator<=(span lhs, span rhs) noexcept { + return !(rhs < lhs); +} + +template +constexpr bool operator>(span lhs, span rhs) noexcept { + return rhs < lhs; +} + +template +constexpr bool operator>=(span lhs, span rhs) noexcept { + return !(lhs < rhs); +} + +// Type-deducing helpers for constructing a span. +template +constexpr span make_span(T* data, size_t size) noexcept { + return span(data, size); +} + +template +constexpr span make_span(T (&array)[N]) noexcept { + return span(array); +} + +template > +constexpr span make_span(Container& container) { + return span(container); +} + +template < + typename Container, + typename T = typename std::add_const::type, + typename = internal::EnableIfConstSpanCompatibleContainer> +constexpr span make_span(const Container& container) { + return span(container); +} + +} // namespace pdfium + +#endif // PDFIUM_THIRD_PARTY_BASE_SPAN_H_ -- cgit v1.2.3