From e3c204392a01870ecc9e8f3b2aa06b2b45306b5a Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Wed, 28 Mar 2018 18:28:45 +0000 Subject: Use ByteStringView in parsers This CL converts the CPDF_SimpleParser to accept a ByteStringView. Several of the callers of SimpleParser are also updated to use a ByteStringView instead of . Change-Id: Ic2df3a06f92e77b53745a0419b44368142f9d8e6 Reviewed-on: https://pdfium-review.googlesource.com/29351 Commit-Queue: dsinclair Reviewed-by: Tom Sepez --- core/fpdfapi/parser/cpdf_simple_parser.cpp | 163 +++++++++------------ core/fpdfapi/parser/cpdf_simple_parser.h | 12 +- .../fpdfapi/parser/cpdf_simple_parser_unittest.cpp | 6 +- core/fpdfapi/parser/cpdf_stream_acc.h | 2 + 4 files changed, 79 insertions(+), 104 deletions(-) (limited to 'core/fpdfapi/parser') diff --git a/core/fpdfapi/parser/cpdf_simple_parser.cpp b/core/fpdfapi/parser/cpdf_simple_parser.cpp index 9e3bf54022..47ce1ad55e 100644 --- a/core/fpdfapi/parser/cpdf_simple_parser.cpp +++ b/core/fpdfapi/parser/cpdf_simple_parser.cpp @@ -10,147 +10,124 @@ #include "core/fpdfapi/parser/fpdf_parser_utility.h" -CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, uint32_t dwSize) - : m_pData(pData), m_dwSize(dwSize), m_dwCurPos(0) {} +CPDF_SimpleParser::CPDF_SimpleParser(const ByteStringView& str) : data_(str) {} -CPDF_SimpleParser::CPDF_SimpleParser(const ByteStringView& str) - : m_pData(str.raw_str()), m_dwSize(str.GetLength()), m_dwCurPos(0) {} +CPDF_SimpleParser::~CPDF_SimpleParser() = default; -std::pair CPDF_SimpleParser::ParseWord() { - const uint8_t* pStart = nullptr; - uint8_t dwSize = 0; +ByteStringView CPDF_SimpleParser::GetWord() { uint8_t ch; // Skip whitespace and comment lines. while (1) { - if (m_dwSize <= m_dwCurPos) - return std::make_pair(pStart, dwSize); + if (data_.GetLength() <= cur_pos_) + return ByteStringView(); - ch = m_pData[m_dwCurPos++]; + ch = data_[cur_pos_++]; while (PDFCharIsWhitespace(ch)) { - if (m_dwSize <= m_dwCurPos) - return std::make_pair(pStart, dwSize); - ch = m_pData[m_dwCurPos++]; + if (data_.GetLength() <= cur_pos_) + return ByteStringView(); + ch = data_[cur_pos_++]; } if (ch != '%') break; while (1) { - if (m_dwSize <= m_dwCurPos) - return std::make_pair(pStart, dwSize); + if (data_.GetLength() <= cur_pos_) + return ByteStringView(); - ch = m_pData[m_dwCurPos++]; + ch = data_[cur_pos_++]; if (PDFCharIsLineEnding(ch)) break; } } - uint32_t start_pos = m_dwCurPos - 1; - pStart = m_pData + start_pos; + uint8_t dwSize = 0; + uint32_t start_pos = cur_pos_ - 1; if (PDFCharIsDelimiter(ch)) { // Find names if (ch == '/') { while (1) { - if (m_dwSize <= m_dwCurPos) + if (data_.GetLength() <= cur_pos_) break; - ch = m_pData[m_dwCurPos++]; + ch = data_[cur_pos_++]; if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { - m_dwCurPos--; - dwSize = m_dwCurPos - start_pos; + cur_pos_--; + dwSize = cur_pos_ - start_pos; break; } } - return std::make_pair(pStart, dwSize); + return data_.Mid(start_pos, dwSize); } dwSize = 1; if (ch == '<') { - if (m_dwSize <= m_dwCurPos) - return std::make_pair(pStart, dwSize); + if (data_.GetLength() <= cur_pos_) + return data_.Mid(start_pos, dwSize); - ch = m_pData[m_dwCurPos++]; - if (ch == '<') + ch = data_[cur_pos_++]; + if (ch == '<') { dwSize = 2; - else - m_dwCurPos--; + } else { + while (cur_pos_ < data_.GetLength() && data_[cur_pos_] != '>') + cur_pos_++; + + if (cur_pos_ < data_.GetLength()) + cur_pos_++; + + dwSize = cur_pos_ - start_pos; + } } else if (ch == '>') { - if (m_dwSize <= m_dwCurPos) - return std::make_pair(pStart, dwSize); + if (data_.GetLength() <= cur_pos_) + return data_.Mid(start_pos, dwSize); - ch = m_pData[m_dwCurPos++]; + ch = data_[cur_pos_++]; if (ch == '>') dwSize = 2; else - m_dwCurPos--; + cur_pos_--; + } else if (ch == '(') { + int level = 1; + while (cur_pos_ < data_.GetLength()) { + if (data_[cur_pos_] == ')') { + level--; + if (level == 0) + break; + } + + if (data_[cur_pos_] == '\\') { + if (data_.GetLength() <= cur_pos_) + break; + + cur_pos_++; + } else if (data_[cur_pos_] == '(') { + level++; + } + if (data_.GetLength() <= cur_pos_) + break; + + cur_pos_++; + } + if (cur_pos_ < data_.GetLength()) + cur_pos_++; + + dwSize = cur_pos_ - start_pos; } - return std::make_pair(pStart, dwSize); + return data_.Mid(start_pos, dwSize); } dwSize = 1; - while (1) { - if (m_dwSize <= m_dwCurPos) - return std::make_pair(pStart, dwSize); - ch = m_pData[m_dwCurPos++]; + while (cur_pos_ < data_.GetLength()) { + ch = data_[cur_pos_++]; if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { - m_dwCurPos--; + cur_pos_--; break; } dwSize++; } - return std::make_pair(pStart, dwSize); -} - -ByteStringView CPDF_SimpleParser::GetWord() { - const uint8_t* pStart; - uint32_t dwSize; - std::tie(pStart, dwSize) = ParseWord(); - - if (dwSize != 1) - return ByteStringView(pStart, dwSize); - - if (pStart[0] == '<') { - while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') - m_dwCurPos++; - - if (m_dwCurPos < m_dwSize) - m_dwCurPos++; - - return ByteStringView(pStart, - static_cast(m_dwCurPos - (pStart - m_pData))); - } - - if (pStart[0] == '(') { - int level = 1; - while (m_dwCurPos < m_dwSize) { - if (m_pData[m_dwCurPos] == ')') { - level--; - if (level == 0) - break; - } - - if (m_pData[m_dwCurPos] == '\\') { - if (m_dwSize <= m_dwCurPos) - break; - - m_dwCurPos++; - } else if (m_pData[m_dwCurPos] == '(') { - level++; - } - if (m_dwSize <= m_dwCurPos) - break; - - m_dwCurPos++; - } - if (m_dwCurPos < m_dwSize) - m_dwCurPos++; - - return ByteStringView(pStart, - static_cast(m_dwCurPos - (pStart - m_pData))); - } - return ByteStringView(pStart, dwSize); + return data_.Mid(start_pos, dwSize); } bool CPDF_SimpleParser::FindTagParamFromStart(const ByteStringView& token, @@ -160,9 +137,9 @@ bool CPDF_SimpleParser::FindTagParamFromStart(const ByteStringView& token, std::vector pBuf(nParams); int buf_index = 0; int buf_count = 0; - m_dwCurPos = 0; + cur_pos_ = 0; while (1) { - pBuf[buf_index++] = m_dwCurPos; + pBuf[buf_index++] = cur_pos_; if (buf_index == nParams) buf_index = 0; @@ -178,7 +155,7 @@ bool CPDF_SimpleParser::FindTagParamFromStart(const ByteStringView& token, if (buf_count < nParams) continue; - m_dwCurPos = pBuf[buf_index]; + cur_pos_ = pBuf[buf_index]; return true; } } diff --git a/core/fpdfapi/parser/cpdf_simple_parser.h b/core/fpdfapi/parser/cpdf_simple_parser.h index 659039e6fa..f02a58c98b 100644 --- a/core/fpdfapi/parser/cpdf_simple_parser.h +++ b/core/fpdfapi/parser/cpdf_simple_parser.h @@ -14,8 +14,8 @@ class CPDF_SimpleParser { public: - CPDF_SimpleParser(const uint8_t* pData, uint32_t dwSize); explicit CPDF_SimpleParser(const ByteStringView& str); + ~CPDF_SimpleParser(); ByteStringView GetWord(); @@ -23,15 +23,11 @@ class CPDF_SimpleParser { // and move the current position to the start of those parameters. bool FindTagParamFromStart(const ByteStringView& token, int nParams); - // For testing only. - uint32_t GetCurPos() const { return m_dwCurPos; } + uint32_t GetCurPosForTest() const { return cur_pos_; } private: - std::pair ParseWord(); - - const uint8_t* m_pData; - uint32_t m_dwSize; - uint32_t m_dwCurPos; + const ByteStringView data_; + uint32_t cur_pos_ = 0; }; #endif // CORE_FPDFAPI_PARSER_CPDF_SIMPLE_PARSER_H_ diff --git a/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp b/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp index e8d3b7142a..b53b6c6c7f 100644 --- a/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp +++ b/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp @@ -49,7 +49,7 @@ TEST(SimpleParserTest, GetWord) { }; for (size_t i = 0; i < FX_ArraySize(test_data); ++i) { const pdfium::StrFuncTestData& data = test_data[i]; - CPDF_SimpleParser parser(data.input, data.input_size); + CPDF_SimpleParser parser(ByteStringView(data.input, data.input_size)); ByteStringView word = parser.GetWord(); EXPECT_EQ(data.expected_size, word.GetLength()) << " for case " << i; if (data.expected_size != word.GetLength()) @@ -88,10 +88,10 @@ TEST(SimpleParserTest, FindTagParamFromStart) { }; for (size_t i = 0; i < FX_ArraySize(test_data); ++i) { const FindTagTestStruct& data = test_data[i]; - CPDF_SimpleParser parser(data.input, data.input_size); + CPDF_SimpleParser parser(ByteStringView(data.input, data.input_size)); EXPECT_EQ(data.result, parser.FindTagParamFromStart(data.token, data.num_params)) << " for case " << i; - EXPECT_EQ(data.result_pos, parser.GetCurPos()) << " for case " << i; + EXPECT_EQ(data.result_pos, parser.GetCurPosForTest()) << " for case " << i; } } diff --git a/core/fpdfapi/parser/cpdf_stream_acc.h b/core/fpdfapi/parser/cpdf_stream_acc.h index d54e000097..ac5253a68b 100644 --- a/core/fpdfapi/parser/cpdf_stream_acc.h +++ b/core/fpdfapi/parser/cpdf_stream_acc.h @@ -30,6 +30,8 @@ class CPDF_StreamAcc : public Retainable { const CPDF_Stream* GetStream() const { return m_pStream.Get(); } CPDF_Dictionary* GetDict() const; + ByteStringView GetDataView() { return ByteStringView(GetData(), GetSize()); } + const uint8_t* GetData() const; uint8_t* GetData(); uint32_t GetSize() const; -- cgit v1.2.3