From 565fb438dd4a426eddb10a6d35ff92ce90fa8ee1 Mon Sep 17 00:00:00 2001 From: Artem Strygin Date: Thu, 2 Nov 2017 15:27:17 +0000 Subject: Unify CPDF_DataAvail::Check[Header/End]. Use CPDF_Parser instead of CPDF_SyntaxParser for parse File Header and tail. Change-Id: I9ca22b927519a6ea280e249ef43721c230faa6ab Bug: Reviewed-on: https://pdfium-review.googlesource.com/13970 Commit-Queue: Art Snake Reviewed-by: dsinclair --- core/fpdfapi/parser/cpdf_data_avail.cpp | 74 ++++++++++----------------------- core/fpdfapi/parser/cpdf_data_avail.h | 3 +- 2 files changed, 24 insertions(+), 53 deletions(-) diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp index 2f79e56678..61927888cb 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.cpp +++ b/core/fpdfapi/parser/cpdf_data_avail.cpp @@ -201,7 +201,6 @@ bool CPDF_DataAvail::LoadAllFile() { } bool CPDF_DataAvail::LoadAllXref() { - m_parser.m_pSyntax->InitParser(m_pFileRead, (uint32_t)m_dwHeaderOffset); if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) && !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) { m_docStatus = PDF_DATAAVAIL_LOADALLFILE; @@ -470,8 +469,6 @@ bool CPDF_DataAvail::CheckHintTables() { szHintLength)) return false; - m_syntaxParser.InitParser(m_pFileRead, m_dwHeaderOffset); - auto pHintTables = pdfium::MakeUnique(GetValidator().Get(), m_pLinearized.get()); std::unique_ptr pHintStream = @@ -488,11 +485,11 @@ std::unique_ptr CPDF_DataAvail::ParseIndirectObjectAt( FX_FILESIZE pos, uint32_t objnum, CPDF_IndirectObjectHolder* pObjList) { - const FX_FILESIZE SavedPos = m_syntaxParser.GetPos(); - m_syntaxParser.SetPos(pos); - std::unique_ptr result = m_syntaxParser.GetIndirectObject( + const FX_FILESIZE SavedPos = GetSyntaxParser()->GetPos(); + GetSyntaxParser()->SetPos(pos); + std::unique_ptr result = GetSyntaxParser()->GetIndirectObject( pObjList, CPDF_SyntaxParser::ParseType::kLoose); - m_syntaxParser.SetPos(SavedPos); + GetSyntaxParser()->SetPos(SavedPos); return (result && (!objnum || result->GetObjNum() == objnum)) ? std::move(result) : nullptr; @@ -525,10 +522,8 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() { if (header_offset == kInvalidHeaderOffset) return DocAvailStatus::DataError; - m_dwHeaderOffset = header_offset; - - m_syntaxParser.InitParserWithValidator(GetValidator(), header_offset); - m_pLinearized = CPDF_LinearizedHeader::Parse(&m_syntaxParser); + m_parser.m_pSyntax->InitParserWithValidator(GetValidator(), header_offset); + m_pLinearized = m_parser.ParseLinearizedHeader(); if (GetValidator()->has_read_problems()) return DocAvailStatus::DataNotAvailable; @@ -537,41 +532,17 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() { } bool CPDF_DataAvail::CheckEnd() { - const uint32_t req_pos = - (uint32_t)(m_dwFileLen > 1024 ? m_dwFileLen - 1024 : 0); - const uint32_t dwSize = (uint32_t)(m_dwFileLen - req_pos); - std::vector buffer(dwSize); - { - const CPDF_ReadValidator::Session read_session(GetValidator().Get()); - m_pFileRead->ReadBlock(buffer.data(), req_pos, dwSize); - if (GetValidator()->has_read_problems()) - return false; - } - - auto file = pdfium::MakeRetain( - buffer.data(), static_cast(dwSize), false); - m_syntaxParser.InitParser(file, 0); - m_syntaxParser.SetPos(dwSize - 1); - if (!m_syntaxParser.BackwardsSearchToWord("startxref", dwSize)) { - m_docStatus = PDF_DATAAVAIL_LOADALLFILE; - return true; - } - m_syntaxParser.GetNextWord(nullptr); + const CPDF_ReadValidator::Session read_session(GetValidator().Get()); + const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef(); - bool bNumber; - ByteString xrefpos_str = m_syntaxParser.GetNextWord(&bNumber); - if (!bNumber) { - m_docStatus = PDF_DATAAVAIL_ERROR; + if (GetValidator()->has_read_problems()) return false; - } - m_dwXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str.c_str()); - if (!m_dwXRefOffset || m_dwXRefOffset > m_dwFileLen) { - m_docStatus = PDF_DATAAVAIL_LOADALLFILE; - return true; - } - m_dwLastXRefOffset = m_dwXRefOffset; - SetStartOffset(m_dwXRefOffset); - m_docStatus = PDF_DATAAVAIL_CROSSREF; + + m_dwLastXRefOffset = last_xref_offset; + m_dwXRefOffset = last_xref_offset; + SetStartOffset(last_xref_offset); + m_docStatus = + (last_xref_offset > 0) ? PDF_DATAAVAIL_CROSSREF : PDF_DATAAVAIL_ERROR; return true; } @@ -719,12 +690,10 @@ bool CPDF_DataAvail::CheckCrossRef() { } bool CPDF_DataAvail::CheckTrailer() { - m_syntaxParser.InitParser(m_pFileRead, 0); - const CPDF_ReadValidator::Session read_session(GetValidator().Get()); - m_syntaxParser.SetPos(m_dwTrailerOffset); + GetSyntaxParser()->SetPos(m_dwTrailerOffset); const std::unique_ptr pTrailer = - m_syntaxParser.GetObjectBody(nullptr); + GetSyntaxParser()->GetObjectBody(nullptr); if (!pTrailer) { if (!GetValidator()->has_read_problems()) m_docStatus = PDF_DATAAVAIL_ERROR; @@ -1175,6 +1144,11 @@ RetainPtr CPDF_DataAvail::GetValidator() const { return m_pFileRead; } +CPDF_SyntaxParser* CPDF_DataAvail::GetSyntaxParser() const { + return m_pDocument ? m_pDocument->GetParser()->m_pSyntax.get() + : m_parser.m_pSyntax.get(); +} + int CPDF_DataAvail::GetPageCount() const { if (m_pLinearized) return m_pLinearized->GetPageCount(); @@ -1203,10 +1177,8 @@ CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) { m_pDocument->SetPageObjNum(index, dwObjNum); // Page object already can be parsed in document. if (!m_pDocument->GetIndirectObject(dwObjNum)) { - m_syntaxParser.InitParser( - m_pFileRead, pdfium::base::checked_cast(szPageStartPos)); m_pDocument->ReplaceIndirectObjectIfHigherGeneration( - dwObjNum, ParseIndirectObjectAt(0, dwObjNum, m_pDocument)); + dwObjNum, ParseIndirectObjectAt(szPageStartPos, dwObjNum, m_pDocument)); } if (!ValidatePage(index)) return nullptr; diff --git a/core/fpdfapi/parser/cpdf_data_avail.h b/core/fpdfapi/parser/cpdf_data_avail.h index 0481408b36..772d3350b4 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.h +++ b/core/fpdfapi/parser/cpdf_data_avail.h @@ -169,18 +169,17 @@ class CPDF_DataAvail final { bool IsFirstCheck(uint32_t dwPage); void ResetFirstCheck(uint32_t dwPage); bool ValidatePage(uint32_t dwPage); + CPDF_SyntaxParser* GetSyntaxParser() const; FileAvail* const m_pFileAvail; RetainPtr m_pFileRead; CPDF_Parser m_parser; - CPDF_SyntaxParser m_syntaxParser; std::unique_ptr m_pRoot; uint32_t m_dwRootObjNum; uint32_t m_dwInfoObjNum; std::unique_ptr m_pLinearized; UnownedPtr m_pTrailer; bool m_bDocAvail; - FX_FILESIZE m_dwHeaderOffset; FX_FILESIZE m_dwLastXRefOffset; FX_FILESIZE m_dwXRefOffset; FX_FILESIZE m_dwTrailerOffset; -- cgit v1.2.3