From e3c204392a01870ecc9e8f3b2aa06b2b45306b5a Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Wed, 28 Mar 2018 18:28:45 +0000 Subject: Use ByteStringView in parsers This CL converts the CPDF_SimpleParser to accept a ByteStringView. Several of the callers of SimpleParser are also updated to use a ByteStringView instead of . Change-Id: Ic2df3a06f92e77b53745a0419b44368142f9d8e6 Reviewed-on: https://pdfium-review.googlesource.com/29351 Commit-Queue: dsinclair Reviewed-by: Tom Sepez --- core/fpdfapi/font/cpdf_cidfont.cpp | 2 +- core/fpdfapi/font/cpdf_cmap.cpp | 4 +- core/fpdfapi/font/cpdf_cmap.h | 2 +- core/fpdfapi/font/cpdf_tounicodemap.cpp | 2 +- core/fpdfapi/page/cpdf_psengine.cpp | 7 +- core/fpdfapi/page/cpdf_psengine.h | 2 +- core/fpdfapi/page/cpdf_psfunc.cpp | 3 +- core/fpdfapi/parser/cpdf_simple_parser.cpp | 163 +++++++++------------ core/fpdfapi/parser/cpdf_simple_parser.h | 12 +- .../fpdfapi/parser/cpdf_simple_parser_unittest.cpp | 6 +- core/fpdfapi/parser/cpdf_stream_acc.h | 2 + testing/libfuzzer/pdf_cmap_fuzzer.cc | 3 +- testing/libfuzzer/pdf_psengine_fuzzer.cc | 2 +- 13 files changed, 91 insertions(+), 119 deletions(-) diff --git a/core/fpdfapi/font/cpdf_cidfont.cpp b/core/fpdfapi/font/cpdf_cidfont.cpp index f17af41eda..4a96b0abcb 100644 --- a/core/fpdfapi/font/cpdf_cidfont.cpp +++ b/core/fpdfapi/font/cpdf_cidfont.cpp @@ -376,7 +376,7 @@ bool CPDF_CIDFont::Load() { auto pAcc = pdfium::MakeRetain(pStream); pAcc->LoadAllDataFiltered(); m_pCMap = pdfium::MakeRetain(); - m_pCMap->LoadEmbedded(pAcc->GetData(), pAcc->GetSize()); + m_pCMap->LoadEmbedded(pAcc->GetDataView()); } else { return false; } diff --git a/core/fpdfapi/font/cpdf_cmap.cpp b/core/fpdfapi/font/cpdf_cmap.cpp index a654fcced3..e82e86b474 100644 --- a/core/fpdfapi/font/cpdf_cmap.cpp +++ b/core/fpdfapi/font/cpdf_cmap.cpp @@ -290,10 +290,10 @@ void CPDF_CMap::LoadPredefined(CPDF_CMapManager* pMgr, m_bLoaded = true; } -void CPDF_CMap::LoadEmbedded(const uint8_t* pData, uint32_t size) { +void CPDF_CMap::LoadEmbedded(const ByteStringView& data) { m_DirectCharcodeToCIDTable = std::vector(65536); CPDF_CMapParser parser(this); - CPDF_SimpleParser syntax(pData, size); + CPDF_SimpleParser syntax(data); while (1) { ByteStringView word = syntax.GetWord(); if (word.IsEmpty()) { diff --git a/core/fpdfapi/font/cpdf_cmap.h b/core/fpdfapi/font/cpdf_cmap.h index 98a77286d0..3ad74ca187 100644 --- a/core/fpdfapi/font/cpdf_cmap.h +++ b/core/fpdfapi/font/cpdf_cmap.h @@ -53,7 +53,7 @@ class CPDF_CMap : public Retainable { void LoadPredefined(CPDF_CMapManager* pMgr, const ByteString& name, bool bPromptCJK); - void LoadEmbedded(const uint8_t* pData, uint32_t dwSize); + void LoadEmbedded(const ByteStringView& data); bool IsLoaded() const { return m_bLoaded; } bool IsVertWriting() const { return m_bVertical; } diff --git a/core/fpdfapi/font/cpdf_tounicodemap.cpp b/core/fpdfapi/font/cpdf_tounicodemap.cpp index 0b746d7bfc..88904f27f5 100644 --- a/core/fpdfapi/font/cpdf_tounicodemap.cpp +++ b/core/fpdfapi/font/cpdf_tounicodemap.cpp @@ -126,7 +126,7 @@ void CPDF_ToUnicodeMap::Load(CPDF_Stream* pStream) { CIDSet cid_set = CIDSET_UNKNOWN; auto pAcc = pdfium::MakeRetain(pStream); pAcc->LoadAllDataFiltered(); - CPDF_SimpleParser parser(pAcc->GetData(), pAcc->GetSize()); + CPDF_SimpleParser parser(pAcc->GetDataView()); while (1) { ByteStringView word = parser.GetWord(); if (word.IsEmpty()) { diff --git a/core/fpdfapi/page/cpdf_psengine.cpp b/core/fpdfapi/page/cpdf_psengine.cpp index 7ce3a604a1..adfd45e877 100644 --- a/core/fpdfapi/page/cpdf_psengine.cpp +++ b/core/fpdfapi/page/cpdf_psengine.cpp @@ -190,10 +190,9 @@ int CPDF_PSEngine::PopInt() { return static_cast(Pop()); } -bool CPDF_PSEngine::Parse(const char* str, int size) { - CPDF_SimpleParser parser(reinterpret_cast(str), size); - ByteStringView word = parser.GetWord(); - return word == "{" ? m_MainProc.Parse(&parser, 0) : false; +bool CPDF_PSEngine::Parse(const ByteStringView& view) { + CPDF_SimpleParser parser(view); + return parser.GetWord() == "{" && m_MainProc.Parse(&parser, 0); } bool CPDF_PSEngine::DoOperator(PDF_PSOP op) { diff --git a/core/fpdfapi/page/cpdf_psengine.h b/core/fpdfapi/page/cpdf_psengine.h index 63d91d0757..8a054cc9e6 100644 --- a/core/fpdfapi/page/cpdf_psengine.h +++ b/core/fpdfapi/page/cpdf_psengine.h @@ -109,7 +109,7 @@ class CPDF_PSEngine { CPDF_PSEngine(); ~CPDF_PSEngine(); - bool Parse(const char* str, int size); + bool Parse(const ByteStringView& str); bool Execute(); bool DoOperator(PDF_PSOP op); void Reset() { m_StackCount = 0; } diff --git a/core/fpdfapi/page/cpdf_psfunc.cpp b/core/fpdfapi/page/cpdf_psfunc.cpp index 9f101eb91b..af996c056c 100644 --- a/core/fpdfapi/page/cpdf_psfunc.cpp +++ b/core/fpdfapi/page/cpdf_psfunc.cpp @@ -16,8 +16,7 @@ CPDF_PSFunc::~CPDF_PSFunc() {} bool CPDF_PSFunc::v_Init(CPDF_Object* pObj) { auto pAcc = pdfium::MakeRetain(pObj->AsStream()); pAcc->LoadAllDataFiltered(); - return m_PS.Parse(reinterpret_cast(pAcc->GetData()), - pAcc->GetSize()); + return m_PS.Parse(pAcc->GetDataView()); } bool CPDF_PSFunc::v_Call(float* inputs, float* results) const { diff --git a/core/fpdfapi/parser/cpdf_simple_parser.cpp b/core/fpdfapi/parser/cpdf_simple_parser.cpp index 9e3bf54022..47ce1ad55e 100644 --- a/core/fpdfapi/parser/cpdf_simple_parser.cpp +++ b/core/fpdfapi/parser/cpdf_simple_parser.cpp @@ -10,147 +10,124 @@ #include "core/fpdfapi/parser/fpdf_parser_utility.h" -CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, uint32_t dwSize) - : m_pData(pData), m_dwSize(dwSize), m_dwCurPos(0) {} +CPDF_SimpleParser::CPDF_SimpleParser(const ByteStringView& str) : data_(str) {} -CPDF_SimpleParser::CPDF_SimpleParser(const ByteStringView& str) - : m_pData(str.raw_str()), m_dwSize(str.GetLength()), m_dwCurPos(0) {} +CPDF_SimpleParser::~CPDF_SimpleParser() = default; -std::pair CPDF_SimpleParser::ParseWord() { - const uint8_t* pStart = nullptr; - uint8_t dwSize = 0; +ByteStringView CPDF_SimpleParser::GetWord() { uint8_t ch; // Skip whitespace and comment lines. while (1) { - if (m_dwSize <= m_dwCurPos) - return std::make_pair(pStart, dwSize); + if (data_.GetLength() <= cur_pos_) + return ByteStringView(); - ch = m_pData[m_dwCurPos++]; + ch = data_[cur_pos_++]; while (PDFCharIsWhitespace(ch)) { - if (m_dwSize <= m_dwCurPos) - return std::make_pair(pStart, dwSize); - ch = m_pData[m_dwCurPos++]; + if (data_.GetLength() <= cur_pos_) + return ByteStringView(); + ch = data_[cur_pos_++]; } if (ch != '%') break; while (1) { - if (m_dwSize <= m_dwCurPos) - return std::make_pair(pStart, dwSize); + if (data_.GetLength() <= cur_pos_) + return ByteStringView(); - ch = m_pData[m_dwCurPos++]; + ch = data_[cur_pos_++]; if (PDFCharIsLineEnding(ch)) break; } } - uint32_t start_pos = m_dwCurPos - 1; - pStart = m_pData + start_pos; + uint8_t dwSize = 0; + uint32_t start_pos = cur_pos_ - 1; if (PDFCharIsDelimiter(ch)) { // Find names if (ch == '/') { while (1) { - if (m_dwSize <= m_dwCurPos) + if (data_.GetLength() <= cur_pos_) break; - ch = m_pData[m_dwCurPos++]; + ch = data_[cur_pos_++]; if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { - m_dwCurPos--; - dwSize = m_dwCurPos - start_pos; + cur_pos_--; + dwSize = cur_pos_ - start_pos; break; } } - return std::make_pair(pStart, dwSize); + return data_.Mid(start_pos, dwSize); } dwSize = 1; if (ch == '<') { - if (m_dwSize <= m_dwCurPos) - return std::make_pair(pStart, dwSize); + if (data_.GetLength() <= cur_pos_) + return data_.Mid(start_pos, dwSize); - ch = m_pData[m_dwCurPos++]; - if (ch == '<') + ch = data_[cur_pos_++]; + if (ch == '<') { dwSize = 2; - else - m_dwCurPos--; + } else { + while (cur_pos_ < data_.GetLength() && data_[cur_pos_] != '>') + cur_pos_++; + + if (cur_pos_ < data_.GetLength()) + cur_pos_++; + + dwSize = cur_pos_ - start_pos; + } } else if (ch == '>') { - if (m_dwSize <= m_dwCurPos) - return std::make_pair(pStart, dwSize); + if (data_.GetLength() <= cur_pos_) + return data_.Mid(start_pos, dwSize); - ch = m_pData[m_dwCurPos++]; + ch = data_[cur_pos_++]; if (ch == '>') dwSize = 2; else - m_dwCurPos--; + cur_pos_--; + } else if (ch == '(') { + int level = 1; + while (cur_pos_ < data_.GetLength()) { + if (data_[cur_pos_] == ')') { + level--; + if (level == 0) + break; + } + + if (data_[cur_pos_] == '\\') { + if (data_.GetLength() <= cur_pos_) + break; + + cur_pos_++; + } else if (data_[cur_pos_] == '(') { + level++; + } + if (data_.GetLength() <= cur_pos_) + break; + + cur_pos_++; + } + if (cur_pos_ < data_.GetLength()) + cur_pos_++; + + dwSize = cur_pos_ - start_pos; } - return std::make_pair(pStart, dwSize); + return data_.Mid(start_pos, dwSize); } dwSize = 1; - while (1) { - if (m_dwSize <= m_dwCurPos) - return std::make_pair(pStart, dwSize); - ch = m_pData[m_dwCurPos++]; + while (cur_pos_ < data_.GetLength()) { + ch = data_[cur_pos_++]; if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { - m_dwCurPos--; + cur_pos_--; break; } dwSize++; } - return std::make_pair(pStart, dwSize); -} - -ByteStringView CPDF_SimpleParser::GetWord() { - const uint8_t* pStart; - uint32_t dwSize; - std::tie(pStart, dwSize) = ParseWord(); - - if (dwSize != 1) - return ByteStringView(pStart, dwSize); - - if (pStart[0] == '<') { - while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') - m_dwCurPos++; - - if (m_dwCurPos < m_dwSize) - m_dwCurPos++; - - return ByteStringView(pStart, - static_cast(m_dwCurPos - (pStart - m_pData))); - } - - if (pStart[0] == '(') { - int level = 1; - while (m_dwCurPos < m_dwSize) { - if (m_pData[m_dwCurPos] == ')') { - level--; - if (level == 0) - break; - } - - if (m_pData[m_dwCurPos] == '\\') { - if (m_dwSize <= m_dwCurPos) - break; - - m_dwCurPos++; - } else if (m_pData[m_dwCurPos] == '(') { - level++; - } - if (m_dwSize <= m_dwCurPos) - break; - - m_dwCurPos++; - } - if (m_dwCurPos < m_dwSize) - m_dwCurPos++; - - return ByteStringView(pStart, - static_cast(m_dwCurPos - (pStart - m_pData))); - } - return ByteStringView(pStart, dwSize); + return data_.Mid(start_pos, dwSize); } bool CPDF_SimpleParser::FindTagParamFromStart(const ByteStringView& token, @@ -160,9 +137,9 @@ bool CPDF_SimpleParser::FindTagParamFromStart(const ByteStringView& token, std::vector pBuf(nParams); int buf_index = 0; int buf_count = 0; - m_dwCurPos = 0; + cur_pos_ = 0; while (1) { - pBuf[buf_index++] = m_dwCurPos; + pBuf[buf_index++] = cur_pos_; if (buf_index == nParams) buf_index = 0; @@ -178,7 +155,7 @@ bool CPDF_SimpleParser::FindTagParamFromStart(const ByteStringView& token, if (buf_count < nParams) continue; - m_dwCurPos = pBuf[buf_index]; + cur_pos_ = pBuf[buf_index]; return true; } } diff --git a/core/fpdfapi/parser/cpdf_simple_parser.h b/core/fpdfapi/parser/cpdf_simple_parser.h index 659039e6fa..f02a58c98b 100644 --- a/core/fpdfapi/parser/cpdf_simple_parser.h +++ b/core/fpdfapi/parser/cpdf_simple_parser.h @@ -14,8 +14,8 @@ class CPDF_SimpleParser { public: - CPDF_SimpleParser(const uint8_t* pData, uint32_t dwSize); explicit CPDF_SimpleParser(const ByteStringView& str); + ~CPDF_SimpleParser(); ByteStringView GetWord(); @@ -23,15 +23,11 @@ class CPDF_SimpleParser { // and move the current position to the start of those parameters. bool FindTagParamFromStart(const ByteStringView& token, int nParams); - // For testing only. - uint32_t GetCurPos() const { return m_dwCurPos; } + uint32_t GetCurPosForTest() const { return cur_pos_; } private: - std::pair ParseWord(); - - const uint8_t* m_pData; - uint32_t m_dwSize; - uint32_t m_dwCurPos; + const ByteStringView data_; + uint32_t cur_pos_ = 0; }; #endif // CORE_FPDFAPI_PARSER_CPDF_SIMPLE_PARSER_H_ diff --git a/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp b/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp index e8d3b7142a..b53b6c6c7f 100644 --- a/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp +++ b/core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp @@ -49,7 +49,7 @@ TEST(SimpleParserTest, GetWord) { }; for (size_t i = 0; i < FX_ArraySize(test_data); ++i) { const pdfium::StrFuncTestData& data = test_data[i]; - CPDF_SimpleParser parser(data.input, data.input_size); + CPDF_SimpleParser parser(ByteStringView(data.input, data.input_size)); ByteStringView word = parser.GetWord(); EXPECT_EQ(data.expected_size, word.GetLength()) << " for case " << i; if (data.expected_size != word.GetLength()) @@ -88,10 +88,10 @@ TEST(SimpleParserTest, FindTagParamFromStart) { }; for (size_t i = 0; i < FX_ArraySize(test_data); ++i) { const FindTagTestStruct& data = test_data[i]; - CPDF_SimpleParser parser(data.input, data.input_size); + CPDF_SimpleParser parser(ByteStringView(data.input, data.input_size)); EXPECT_EQ(data.result, parser.FindTagParamFromStart(data.token, data.num_params)) << " for case " << i; - EXPECT_EQ(data.result_pos, parser.GetCurPos()) << " for case " << i; + EXPECT_EQ(data.result_pos, parser.GetCurPosForTest()) << " for case " << i; } } diff --git a/core/fpdfapi/parser/cpdf_stream_acc.h b/core/fpdfapi/parser/cpdf_stream_acc.h index d54e000097..ac5253a68b 100644 --- a/core/fpdfapi/parser/cpdf_stream_acc.h +++ b/core/fpdfapi/parser/cpdf_stream_acc.h @@ -30,6 +30,8 @@ class CPDF_StreamAcc : public Retainable { const CPDF_Stream* GetStream() const { return m_pStream.Get(); } CPDF_Dictionary* GetDict() const; + ByteStringView GetDataView() { return ByteStringView(GetData(), GetSize()); } + const uint8_t* GetData() const; uint8_t* GetData(); uint32_t GetSize() const; diff --git a/testing/libfuzzer/pdf_cmap_fuzzer.cc b/testing/libfuzzer/pdf_cmap_fuzzer.cc index b0e41d511c..0f2a22dcf2 100644 --- a/testing/libfuzzer/pdf_cmap_fuzzer.cc +++ b/testing/libfuzzer/pdf_cmap_fuzzer.cc @@ -8,7 +8,6 @@ #include "third_party/base/ptr_util.h" extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - auto cmap = pdfium::MakeRetain(); - cmap->LoadEmbedded(data, size); + pdfium::MakeRetain()->LoadEmbedded(ByteStringView(data, size)); return 0; } diff --git a/testing/libfuzzer/pdf_psengine_fuzzer.cc b/testing/libfuzzer/pdf_psengine_fuzzer.cc index fcdfed863e..253b43e381 100644 --- a/testing/libfuzzer/pdf_psengine_fuzzer.cc +++ b/testing/libfuzzer/pdf_psengine_fuzzer.cc @@ -8,7 +8,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { CPDF_PSEngine engine; - if (engine.Parse(reinterpret_cast(data), size)) + if (engine.Parse(ByteStringView(data, size))) engine.Execute(); return 0; } -- cgit v1.2.3