From e3c204392a01870ecc9e8f3b2aa06b2b45306b5a Mon Sep 17 00:00:00 2001
From: Dan Sinclair <dsinclair@chromium.org>
Date: Wed, 28 Mar 2018 18:28:45 +0000
Subject: Use ByteStringView in parsers

This CL converts the CPDF_SimpleParser to accept a ByteStringView.
Several of the callers of SimpleParser are also updated to use a
ByteStringView instead of <char*,size>.

Change-Id: Ic2df3a06f92e77b53745a0419b44368142f9d8e6
Reviewed-on: https://pdfium-review.googlesource.com/29351
Commit-Queue: dsinclair <dsinclair@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
---
 core/fpdfapi/parser/cpdf_simple_parser.cpp         | 163 +++++++++------------
 core/fpdfapi/parser/cpdf_simple_parser.h           |  12 +-
 .../fpdfapi/parser/cpdf_simple_parser_unittest.cpp |   6 +-
 core/fpdfapi/parser/cpdf_stream_acc.h              |   2 +
 4 files changed, 79 insertions(+), 104 deletions(-)

(limited to 'core/fpdfapi/parser')
diff --git a/core/fpdfapi/parser/cpdf_simple_parser.cpp b/core/fpdfapi/parser/cpdf_simple_parser.cpp
index 9e3bf54022..47ce1ad55e 100644
--- a/core/fpdfapi/parser/cpdf_simple_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_simple_parser.cpp
@@ -10,147 +10,124 @@
 
 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
 
-CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, uint32_t dwSize)
-    : m_pData(pData), m_dwSize(dwSize), m_dwCurPos(0) {}
+CPDF_SimpleParser::CPDF_SimpleParser(const ByteStringView& str) : data_(str) {}
 
-CPDF_SimpleParser::CPDF_SimpleParser(const ByteStringView& str)
-    : m_pData(str.raw_str()), m_dwSize(str.GetLength()), m_dwCurPos(0) {}
+CPDF_SimpleParser::~CPDF_SimpleParser() = default;
 
-std::pair<const uint8_t*, uint32_t> CPDF_SimpleParser::ParseWord() {
-  const uint8_t* pStart = nullptr;
-  uint8_t dwSize = 0;
+ByteStringView CPDF_SimpleParser::GetWord() {
   uint8_t ch;
 
   // Skip whitespace and comment lines.
   while (1) {
-    if (m_dwSize <= m_dwCurPos)
-      return std::make_pair(pStart, dwSize);
+    if (data_.GetLength() <= cur_pos_)
+      return ByteStringView();
 
-    ch = m_pData[m_dwCurPos++];
+    ch = data_[cur_pos_++];
     while (PDFCharIsWhitespace(ch)) {
-      if (m_dwSize <= m_dwCurPos)
-        return std::make_pair(pStart, dwSize);
-      ch = m_pData[m_dwCurPos++];
+      if (data_.GetLength() <= cur_pos_)
+        return ByteStringView();
+      ch = data_[cur_pos_++];
     }
 
     if (ch != '%')
       break;
 
     while (1) {
-      if (m_dwSize <= m_dwCurPos)
-        return std::make_pair(pStart, dwSize);
+      if (data_.GetLength() <= cur_pos_)
+        return ByteStringView();
 
-      ch = m_pData[m_dwCurPos++];
+      ch = data_[cur_pos_++];
       if (PDFCharIsLineEnding(ch))
         break;
     }
   }
 
-  uint32_t start_pos = m_dwCurPos - 1;
-  pStart = m_pData + start_pos;
+  uint8_t dwSize = 0;
+  uint32_t start_pos = cur_pos_ - 1;
   if (PDFCharIsDelimiter(ch)) {
     // Find names
     if (ch == '/') {
       while (1) {
-        if (m_dwSize <= m_dwCurPos)
+        if (data_.GetLength() <= cur_pos_)
           break;
 
-        ch = m_pData[m_dwCurPos++];
+        ch = data_[cur_pos_++];
         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
-          m_dwCurPos--;
-          dwSize = m_dwCurPos - start_pos;
+          cur_pos_--;
+          dwSize = cur_pos_ - start_pos;
           break;
         }
       }
-      return std::make_pair(pStart, dwSize);
+      return data_.Mid(start_pos, dwSize);
     }
 
     dwSize = 1;
     if (ch == '<') {
-      if (m_dwSize <= m_dwCurPos)
-        return std::make_pair(pStart, dwSize);
+      if (data_.GetLength() <= cur_pos_)
+        return data_.Mid(start_pos, dwSize);
 
-      ch = m_pData[m_dwCurPos++];
-      if (ch == '<')
+      ch = data_[cur_pos_++];
+      if (ch == '<') {
         dwSize = 2;
-      else
-        m_dwCurPos--;
+      } else {
+        while (cur_pos_ < data_.GetLength() && data_[cur_pos_] != '>')
+          cur_pos_++;
+
+        if (cur_pos_ < data_.GetLength())
+          cur_pos_++;
+
+        dwSize = cur_pos_ - start_pos;
+      }
     } else if (ch == '>') {
-      if (m_dwSize <= m_dwCurPos)
-        return std::make_pair(pStart, dwSize);
+      if (data_.GetLength() <= cur_pos_)
+        return data_.Mid(start_pos, dwSize);
 
-      ch = m_pData[m_dwCurPos++];
+      ch = data_[cur_pos_++];
       if (ch == '>')
         dwSize = 2;
       else
-        m_dwCurPos--;
+        cur_pos_--;
+    } else if (ch == '(') {
+      int level = 1;
+      while (cur_pos_ < data_.GetLength()) {
+        if (data_[cur_pos_] == ')') {
+          level--;
+          if (level == 0)
+            break;
+        }
+
+        if (data_[cur_pos_] == '\\') {
+          if (data_.GetLength() <= cur_pos_)
+            break;
+
+          cur_pos_++;
+        } else if (data_[cur_pos_] == '(') {
+          level++;
+        }
+        if (data_.GetLength() <= cur_pos_)
+          break;
+
+        cur_pos_++;
+      }
+      if (cur_pos_ < data_.GetLength())
+        cur_pos_++;
+
+      dwSize = cur_pos_ - start_pos;
     }
-    return std::make_pair(pStart, dwSize);
+    return data_.Mid(start_pos, dwSize);
   }
 
   dwSize = 1;
-  while (1) {
-    if (m_dwSize <= m_dwCurPos)
-      return std::make_pair(pStart, dwSize);
-    ch = m_pData[m_dwCurPos++];
+  while (cur_pos_ < data_.GetLength()) {
+    ch = data_[cur_pos_++];
 
     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
-      m_dwCurPos--;
+      cur_pos_--;
       break;
     }
     dwSize++;
   }
-  return std::make_pair(pStart, dwSize);
-}
-
-ByteStringView CPDF_SimpleParser::GetWord() {
-  const uint8_t* pStart;
-  uint32_t dwSize;
-  std::tie(pStart, dwSize) = ParseWord();
-
-  if (dwSize != 1)
-    return ByteStringView(pStart, dwSize);
-
-  if (pStart[0] == '<') {
-    while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>')
-      m_dwCurPos++;
-
-    if (m_dwCurPos < m_dwSize)
-      m_dwCurPos++;
-
-    return ByteStringView(pStart,
-                          static_cast<size_t>(m_dwCurPos - (pStart - m_pData)));
-  }
-
-  if (pStart[0] == '(') {
-    int level = 1;
-    while (m_dwCurPos < m_dwSize) {
-      if (m_pData[m_dwCurPos] == ')') {
-        level--;
-        if (level == 0)
-          break;
-      }
-
-      if (m_pData[m_dwCurPos] == '\\') {
-        if (m_dwSize <= m_dwCurPos)
-          break;
-
-        m_dwCurPos++;
-      } else if (m_pData[m_dwCurPos] == '(') {
-        level++;
-      }
-      if (m_dwSize <= m_dwCurPos)
-        break;
-
-      m_dwCurPos++;
-    }
-    if (m_dwCurPos < m_dwSize)
-      m_dwCurPos++;
-
-    return ByteStringView(pStart,
-                          static_cast<size_t>(m_dwCurPos - (pStart - m_pData)));
-  }
-  return ByteStringView(pStart, dwSize);
+  return data_.Mid(start_pos, dwSize);
 }
 
 bool CPDF_SimpleParser::FindTagParamFromStart(const ByteStringView& token,
@@ -160,9 +137,9 @@ bool CPDF_SimpleParser::FindTagParamFromStart(const ByteStringView& token,
   std::vector<uint32_t> pBuf(nParams);
   int buf_index = 0;
   int buf_count = 0;
-  m_dwCurPos = 0;
+  cur_pos_ = 0;
   while (1) {
-    pBuf[buf_index++] = m_dwCurPos;
+    pBuf[buf_index++] = cur_pos_;
     if (buf_index == nParams)
       buf_index = 0;
 
@@ -178,7 +155,7 @@ bool CPDF_SimpleParser::FindTagParamFromStart(const ByteStringView& token,
       if (buf_count < nParams)
         continue;
 
-      m_dwCurPos = pBuf[buf_index];
+      cur_pos_ = pBuf[buf_index];
       return true;
     }
   }
diff --git a/core/fpdfapi/parser/cpdf_simple_parser.h b/core/fpdfapi/parser/cpdf_simple_parser.h
index 659039e6fa..f02a58c98b 100644
--- a/core/fpdfapi/parser/cpdf_simple_parser.h
+++ b/core/fpdfapi/parser/cpdf_simple_parser.h
@@ -14,8 +14,8 @@
 
 class CPDF_SimpleParser {
  public:
-  CPDF_SimpleParser(const uint8_t* pData, uint32_t dwSize);
   explicit CPDF_SimpleParser(const ByteStringView& str);
+  ~CPDF_SimpleParser();
 
   ByteStringView GetWord();
 
@@ -23,15 +23,11 @@ class CPDF_SimpleParser {
   // and move the current position to the start of those parameters.
   bool FindTagParamFromStart(const ByteStringView& token, int nParams);
 
-  // For testing only.
-  uint32_t GetCurPos() const { return m_dwCurPos; }
+  uint32_t GetCurPosForTest() const { return cur_pos_; }
 
  private:
-  std::pair<const uint8_t*, uint32_t> ParseWord();
-
-  const uint8_t* m_pData;
-  uint32_t m_dwSize;
-  uint32_t m_dwCurPos;
+  const ByteStringView data_;
+  uint32_t cur_pos_ = 0;
 };
 
 #endif  // CORE_FPDFAPI_PARSER_CPDF_SIMPLE_PARSER_H_
diff --git a/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp b/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp
index e8d3b7142a..b53b6c6c7f 100644
--- a/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp
+++ b/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp
@@ -49,7 +49,7 @@ TEST(SimpleParserTest, GetWord) {
   };
   for (size_t i = 0; i < FX_ArraySize(test_data); ++i) {
     const pdfium::StrFuncTestData& data = test_data[i];
-    CPDF_SimpleParser parser(data.input, data.input_size);
+    CPDF_SimpleParser parser(ByteStringView(data.input, data.input_size));
     ByteStringView word = parser.GetWord();
     EXPECT_EQ(data.expected_size, word.GetLength()) << " for case " << i;
     if (data.expected_size != word.GetLength())
@@ -88,10 +88,10 @@ TEST(SimpleParserTest, FindTagParamFromStart) {
   };
   for (size_t i = 0; i < FX_ArraySize(test_data); ++i) {
     const FindTagTestStruct& data = test_data[i];
-    CPDF_SimpleParser parser(data.input, data.input_size);
+    CPDF_SimpleParser parser(ByteStringView(data.input, data.input_size));
     EXPECT_EQ(data.result,
               parser.FindTagParamFromStart(data.token, data.num_params))
         << " for case " << i;
-    EXPECT_EQ(data.result_pos, parser.GetCurPos()) << " for case " << i;
+    EXPECT_EQ(data.result_pos, parser.GetCurPosForTest()) << " for case " << i;
   }
 }
diff --git a/core/fpdfapi/parser/cpdf_stream_acc.h b/core/fpdfapi/parser/cpdf_stream_acc.h
index d54e000097..ac5253a68b 100644
--- a/core/fpdfapi/parser/cpdf_stream_acc.h
+++ b/core/fpdfapi/parser/cpdf_stream_acc.h
@@ -30,6 +30,8 @@ class CPDF_StreamAcc : public Retainable {
   const CPDF_Stream* GetStream() const { return m_pStream.Get(); }
   CPDF_Dictionary* GetDict() const;
 
+  ByteStringView GetDataView() { return ByteStringView(GetData(), GetSize()); }
+
   const uint8_t* GetData() const;
   uint8_t* GetData();
   uint32_t GetSize() const;
-- 
cgit v1.2.3