From 0578244a1a68b413e0843b0ecb9c434bd7b6c8af Mon Sep 17 00:00:00 2001 From: Tom Sepez Date: Wed, 3 May 2017 12:37:07 -0700 Subject: CPDF_SyntaxParser::SearchWord() is always backwards and for whole-words Change-Id: Ic31d9cda5e919a754162e14e69cb63671a3fe8b9 Reviewed-on: https://pdfium-review.googlesource.com/4794 Reviewed-by: dsinclair Commit-Queue: dsinclair Commit-Queue: Tom Sepez --- core/fpdfapi/parser/cpdf_data_avail.cpp | 5 +- core/fpdfapi/parser/cpdf_parser.cpp | 2 +- core/fpdfapi/parser/cpdf_syntax_parser.cpp | 74 ++++++++---------------------- core/fpdfapi/parser/cpdf_syntax_parser.h | 8 +--- 4 files changed, 23 insertions(+), 66 deletions(-) diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp index 709874ef15..b13b982430 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.cpp +++ b/core/fpdfapi/parser/cpdf_data_avail.cpp @@ -748,7 +748,6 @@ bool CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, uint32_t dwLen) { bool CPDF_DataAvail::CheckEnd(DownloadHints* pHints) { uint32_t req_pos = (uint32_t)(m_dwFileLen > 1024 ? m_dwFileLen - 1024 : 0); uint32_t dwSize = (uint32_t)(m_dwFileLen - req_pos); - if (!m_pFileAvail->IsDataAvail(req_pos, dwSize)) { pHints->AddSegment(req_pos, dwSize); return false; @@ -761,12 +760,10 @@ bool CPDF_DataAvail::CheckEnd(DownloadHints* pHints) { buffer, static_cast(dwSize), false); m_syntaxParser.InitParser(file, 0); m_syntaxParser.SetPos(dwSize - 1); - - if (!m_syntaxParser.SearchWord("startxref", true, false, dwSize)) { + if (!m_syntaxParser.BackwardsSearchToWord("startxref", dwSize)) { m_docStatus = PDF_DATAAVAIL_LOADALLFILE; return true; } - m_syntaxParser.GetNextWord(nullptr); bool bNumber; diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp index fc98be5f6e..12ec81bc07 100644 --- a/core/fpdfapi/parser/cpdf_parser.cpp +++ b/core/fpdfapi/parser/cpdf_parser.cpp @@ -159,7 +159,7 @@ CPDF_Parser::Error CPDF_Parser::StartParse( m_pDocument = pDocument; bool bXRefRebuilt = false; - if (m_pSyntax->SearchWord("startxref", true, false, 4096)) { + if (m_pSyntax->BackwardsSearchToWord("startxref", 4096)) { m_SortedOffset.insert(m_pSyntax->GetPos()); m_pSyntax->GetKeyword(); diff --git a/core/fpdfapi/parser/cpdf_syntax_parser.cpp b/core/fpdfapi/parser/cpdf_syntax_parser.cpp index 45a5a8a709..e41736dd86 100644 --- a/core/fpdfapi/parser/cpdf_syntax_parser.cpp +++ b/core/fpdfapi/parser/cpdf_syntax_parser.cpp @@ -90,7 +90,7 @@ bool CPDF_SyntaxParser::GetNextChar(uint8_t& ch) { return true; } -bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) { +bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch) { pos += m_HeaderOffset; if (pos >= m_FileLen) return false; @@ -105,7 +105,7 @@ bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) { if (!ReadChar(read_pos, read_size)) return false; } - ch = m_pFileBuf[pos - m_BufOffset]; + *ch = m_pFileBuf[pos - m_BufOffset]; return true; } @@ -814,74 +814,38 @@ bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, return true; } -// TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards -// and drop the bool. -bool CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag, - bool bWholeWord, - bool bForward, - FX_FILESIZE limit) { +bool CPDF_SyntaxParser::BackwardsSearchToWord(const CFX_ByteStringC& tag, + FX_FILESIZE limit) { int32_t taglen = tag.GetLength(); if (taglen == 0) return false; FX_FILESIZE pos = m_Pos; - int32_t offset = 0; - if (!bForward) - offset = taglen - 1; - - const uint8_t* tag_data = tag.raw_str(); - uint8_t byte; + int32_t offset = taglen - 1; while (1) { - if (bForward) { - if (limit && pos >= m_Pos + limit) - return false; - - if (!GetCharAt(pos, byte)) - return false; - - } else { - if (limit && pos <= m_Pos - limit) - return false; + if (limit && pos <= m_Pos - limit) + return false; - if (!GetCharAtBackward(pos, byte)) - return false; - } + uint8_t byte; + if (!GetCharAtBackward(pos, &byte)) + return false; - if (byte == tag_data[offset]) { - if (bForward) { - offset++; - if (offset < taglen) { - pos++; - continue; - } - } else { - offset--; - if (offset >= 0) { - pos--; - continue; - } + if (byte == tag[offset]) { + offset--; + if (offset >= 0) { + pos--; + continue; } - - FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos; - if (!bWholeWord || IsWholeWord(startpos, limit, tag, false)) { - m_Pos = startpos; + if (IsWholeWord(pos, limit, tag, false)) { + m_Pos = pos; return true; } } - - if (bForward) { - offset = byte == tag_data[0] ? 1 : 0; - pos++; - } else { - offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1; - pos--; - } - + offset = byte == tag[taglen - 1] ? taglen - 2 : taglen - 1; + pos--; if (pos < 0) return false; } - - return false; } FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag, diff --git a/core/fpdfapi/parser/cpdf_syntax_parser.h b/core/fpdfapi/parser/cpdf_syntax_parser.h index f9a9bd9a8a..4c5e252978 100644 --- a/core/fpdfapi/parser/cpdf_syntax_parser.h +++ b/core/fpdfapi/parser/cpdf_syntax_parser.h @@ -46,11 +46,7 @@ class CPDF_SyntaxParser { CFX_ByteString GetKeyword(); void ToNextLine(); void ToNextWord(); - bool SearchWord(const CFX_ByteStringC& word, - bool bWholeWord, - bool bForward, - FX_FILESIZE limit); - + bool BackwardsSearchToWord(const CFX_ByteStringC& word, FX_FILESIZE limit); FX_FILESIZE FindTag(const CFX_ByteStringC& tag, FX_FILESIZE limit); void SetEncrypt(const CFX_RetainPtr& pCryptoHandler); bool ReadBlock(uint8_t* pBuf, uint32_t size); @@ -68,7 +64,7 @@ class CPDF_SyntaxParser { uint32_t GetDirectNum(); bool ReadChar(FX_FILESIZE read_pos, uint32_t read_size); bool GetNextChar(uint8_t& ch); - bool GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch); + bool GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch); void GetNextWordInternal(bool* bIsNumber); bool IsWholeWord(FX_FILESIZE startpos, FX_FILESIZE limit, -- cgit v1.2.3