summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArtem Strygin <art-snake@yandex-team.ru>2017-11-02 14:40:38 +0000
committerChromium commit bot <commit-bot@chromium.org>2017-11-02 14:40:38 +0000
commit1beb4a9c5ff7ac58450310493783ef7869f4de71 (patch)
tree8fc8c18dd575cccbfb22e98611af5c7a850371bb
parentd4ef57288f19317de4e60a3b20425250cd6cd933 (diff)
downloadpdfium-1beb4a9c5ff7ac58450310493783ef7869f4de71.tar.xz
Unify parsing of linearized header.
Change-Id: I3b55b1331ee97af254c248d4ac91b627c9603b59 Reviewed-on: https://pdfium-review.googlesource.com/13831 Commit-Queue: Art Snake <art-snake@yandex-team.ru> Reviewed-by: dsinclair <dsinclair@chromium.org>
-rw-r--r--core/fpdfapi/parser/cpdf_data_avail.cpp100
-rw-r--r--core/fpdfapi/parser/cpdf_data_avail.h4
-rw-r--r--core/fpdfapi/parser/cpdf_linearized_header.cpp40
-rw-r--r--core/fpdfapi/parser/cpdf_linearized_header.h8
-rw-r--r--core/fpdfapi/parser/cpdf_parser.cpp34
-rw-r--r--core/fpdfapi/parser/cpdf_parser.h2
6 files changed, 89 insertions, 99 deletions
diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp
index 93dd39b87b..2f79e56678 100644
--- a/core/fpdfapi/parser/cpdf_data_avail.cpp
+++ b/core/fpdfapi/parser/cpdf_data_avail.cpp
@@ -108,6 +108,7 @@ CPDF_DataAvail::CPDF_DataAvail(
m_bCurPageDictLoadOK = false;
m_bLinearedDataOK = false;
m_bSupportHintTable = bSupportHintTable;
+ m_bHeaderAvail = false;
}
CPDF_DataAvail::~CPDF_DataAvail() {
@@ -413,25 +414,19 @@ bool CPDF_DataAvail::CheckPages() {
}
bool CPDF_DataAvail::CheckHeader() {
- ASSERT(m_dwFileLen >= 0);
- const uint32_t kReqSize = std::min(static_cast<uint32_t>(m_dwFileLen), 1024U);
- std::vector<uint8_t> buffer(kReqSize);
- {
- const CPDF_ReadValidator::Session read_session(GetValidator().Get());
- m_pFileRead->ReadBlock(buffer.data(), 0, kReqSize);
- if (GetValidator()->has_read_problems())
+ switch (CheckHeaderAndLinearized()) {
+ case DocAvailStatus::DataAvailable:
+ m_docStatus = m_pLinearized ? PDF_DATAAVAIL_FIRSTPAGE : PDF_DATAAVAIL_END;
+ return true;
+ case DocAvailStatus::DataNotAvailable:
+ return false;
+ case DocAvailStatus::DataError:
+ m_docStatus = PDF_DATAAVAIL_ERROR;
+ return true;
+ default:
+ NOTREACHED();
return false;
}
-
- if (IsLinearizedFile(buffer.data(), kReqSize)) {
- m_docStatus = PDF_DATAAVAIL_FIRSTPAGE;
- return true;
- }
- if (m_docStatus == PDF_DATAAVAIL_ERROR)
- return false;
-
- m_docStatus = PDF_DATAAVAIL_END;
- return true;
}
bool CPDF_DataAvail::CheckFirstPage() {
@@ -504,56 +499,41 @@ std::unique_ptr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt(
}
CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
- const uint32_t kReqSize = 1024;
- if (!m_pFileAvail->IsDataAvail(0, kReqSize))
- return LinearizationUnknown;
-
- FX_FILESIZE dwSize = m_pFileRead->GetSize();
- if (dwSize < (FX_FILESIZE)kReqSize)
- return LinearizationUnknown;
-
- std::vector<uint8_t> buffer(kReqSize);
- m_pFileRead->ReadBlock(buffer.data(), 0, kReqSize);
- if (IsLinearizedFile(buffer.data(), kReqSize))
- return Linearized;
-
- return NotLinearized;
+ switch (CheckHeaderAndLinearized()) {
+ case DocAvailStatus::DataAvailable:
+ return m_pLinearized ? DocLinearizationStatus::Linearized
+ : DocLinearizationStatus::NotLinearized;
+ case DocAvailStatus::DataNotAvailable:
+ return DocLinearizationStatus::LinearizationUnknown;
+ case DocAvailStatus::DataError:
+ return DocLinearizationStatus::NotLinearized;
+ default:
+ NOTREACHED();
+ return DocLinearizationStatus::LinearizationUnknown;
+ }
}
-bool CPDF_DataAvail::IsLinearized() {
- return !!m_pLinearized;
-}
+CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() {
+ if (m_bHeaderAvail)
+ return DocAvailStatus::DataAvailable;
-bool CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, uint32_t dwLen) {
- if (m_pLinearized)
- return true;
+ const CPDF_ReadValidator::Session read_session(GetValidator().Get());
+ const int32_t header_offset = GetHeaderOffset(GetValidator());
+ if (GetValidator()->has_read_problems())
+ return DocAvailStatus::DataNotAvailable;
- auto file = pdfium::MakeRetain<CFX_MemoryStream>(
- pData, static_cast<size_t>(dwLen), false);
- int32_t offset = GetHeaderOffset(file);
- if (offset == kInvalidHeaderOffset) {
- m_docStatus = PDF_DATAAVAIL_ERROR;
- return false;
- }
+ if (header_offset == kInvalidHeaderOffset)
+ return DocAvailStatus::DataError;
- m_dwHeaderOffset = offset;
- m_syntaxParser.InitParser(file, offset);
- m_syntaxParser.SetPos(m_syntaxParser.m_HeaderOffset + 9);
+ m_dwHeaderOffset = header_offset;
- bool bNumber;
- ByteString wordObjNum = m_syntaxParser.GetNextWord(&bNumber);
- if (!bNumber)
- return false;
+ m_syntaxParser.InitParserWithValidator(GetValidator(), header_offset);
+ m_pLinearized = CPDF_LinearizedHeader::Parse(&m_syntaxParser);
+ if (GetValidator()->has_read_problems())
+ return DocAvailStatus::DataNotAvailable;
- uint32_t objnum = FXSYS_atoui(wordObjNum.c_str());
- m_pLinearized = CPDF_LinearizedHeader::CreateForObject(
- ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum));
- if (!m_pLinearized ||
- m_pLinearized->GetFileSize() != m_pFileRead->GetSize()) {
- m_pLinearized.reset();
- return false;
- }
- return true;
+ m_bHeaderAvail = true;
+ return DocAvailStatus::DataAvailable;
}
bool CPDF_DataAvail::CheckEnd() {
diff --git a/core/fpdfapi/parser/cpdf_data_avail.h b/core/fpdfapi/parser/cpdf_data_avail.h
index 2d46be1152..0481408b36 100644
--- a/core/fpdfapi/parser/cpdf_data_avail.h
+++ b/core/fpdfapi/parser/cpdf_data_avail.h
@@ -103,7 +103,6 @@ class CPDF_DataAvail final {
DocAvailStatus IsPageAvail(uint32_t dwPage, DownloadHints* pHints);
DocFormStatus IsFormAvail(DownloadHints* pHints);
DocLinearizationStatus IsLinearizedPDF();
- bool IsLinearized();
RetainPtr<IFX_SeekableReadStream> GetFileRead() const;
int GetPageCount() const;
CPDF_Dictionary* GetPage(int index);
@@ -140,7 +139,7 @@ class CPDF_DataAvail final {
DocFormStatus CheckAcroForm();
bool CheckPageStatus();
- bool IsLinearizedFile(uint8_t* pData, uint32_t dwLen);
+ DocAvailStatus CheckHeaderAndLinearized();
void SetStartOffset(FX_FILESIZE dwOffset);
bool GetNextToken(ByteString* token);
bool GetNextChar(uint8_t& ch);
@@ -219,6 +218,7 @@ class CPDF_DataAvail final {
std::map<uint32_t, std::unique_ptr<CPDF_PageObjectAvail>> m_PagesObjAvail;
std::map<const CPDF_Object*, std::unique_ptr<CPDF_PageObjectAvail>>
m_PagesResourcesAvail;
+ bool m_bHeaderAvail;
};
#endif // CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
diff --git a/core/fpdfapi/parser/cpdf_linearized_header.cpp b/core/fpdfapi/parser/cpdf_linearized_header.cpp
index 98cdcc450f..ce22c55f48 100644
--- a/core/fpdfapi/parser/cpdf_linearized_header.cpp
+++ b/core/fpdfapi/parser/cpdf_linearized_header.cpp
@@ -12,10 +12,13 @@
#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_number.h"
+#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
#include "third_party/base/ptr_util.h"
namespace {
+constexpr FX_FILESIZE kLinearizedHeaderOffset = 9;
+
template <class T>
bool IsValidNumericDictionaryValue(const CPDF_Dictionary* pDict,
const char* key,
@@ -32,21 +35,48 @@ bool IsValidNumericDictionaryValue(const CPDF_Dictionary* pDict,
return static_cast<T>(raw_value) >= min_value;
}
+bool IsLinearizedHeaderValid(const CPDF_LinearizedHeader* header,
+ FX_FILESIZE file_size) {
+ ASSERT(header);
+ return header->GetFileSize() == file_size &&
+ header->GetMainXRefTableFirstEntryOffset() < file_size &&
+ header->GetPageCount() > 0 &&
+ header->GetFirstPageEndOffset() < file_size &&
+ header->GetLastXRefOffset() < file_size &&
+ header->GetHintStart() < file_size;
+}
+
} // namespace
// static
-std::unique_ptr<CPDF_LinearizedHeader> CPDF_LinearizedHeader::CreateForObject(
- std::unique_ptr<CPDF_Object> pObj) {
- auto pDict = ToDictionary(std::move(pObj));
+std::unique_ptr<CPDF_LinearizedHeader> CPDF_LinearizedHeader::Parse(
+ CPDF_SyntaxParser* parser) {
+ parser->SetPos(kLinearizedHeaderOffset);
+
+ const auto pDict = ToDictionary(
+ parser->GetIndirectObject(nullptr, CPDF_SyntaxParser::ParseType::kLoose));
+
if (!pDict || !pDict->KeyExist("Linearized") ||
!IsValidNumericDictionaryValue<FX_FILESIZE>(pDict.get(), "L", 1) ||
!IsValidNumericDictionaryValue<uint32_t>(pDict.get(), "P", 0, false) ||
!IsValidNumericDictionaryValue<FX_FILESIZE>(pDict.get(), "T", 1) ||
!IsValidNumericDictionaryValue<uint32_t>(pDict.get(), "N", 0) ||
!IsValidNumericDictionaryValue<FX_FILESIZE>(pDict.get(), "E", 1) ||
- !IsValidNumericDictionaryValue<uint32_t>(pDict.get(), "O", 1))
+ !IsValidNumericDictionaryValue<uint32_t>(pDict.get(), "O", 1)) {
return nullptr;
- return pdfium::WrapUnique(new CPDF_LinearizedHeader(pDict.get()));
+ }
+ // Move parser to the start of the xref table for the documents first page.
+ // (skpping endobj keyword)
+ if (parser->GetNextWord(nullptr) != "endobj")
+ return nullptr;
+
+ auto result = pdfium::WrapUnique(new CPDF_LinearizedHeader(pDict.get()));
+ result->m_szLastXRefOffset = parser->GetPos();
+
+ return IsLinearizedHeaderValid(result.get(),
+ parser->GetFileAccess()->GetSize())
+ ? std::move(result)
+ : nullptr;
}
CPDF_LinearizedHeader::CPDF_LinearizedHeader(const CPDF_Dictionary* pDict) {
diff --git a/core/fpdfapi/parser/cpdf_linearized_header.h b/core/fpdfapi/parser/cpdf_linearized_header.h
index 98ae9c650f..d73216059f 100644
--- a/core/fpdfapi/parser/cpdf_linearized_header.h
+++ b/core/fpdfapi/parser/cpdf_linearized_header.h
@@ -14,12 +14,13 @@
class CPDF_Dictionary;
class CPDF_Object;
+class CPDF_SyntaxParser;
class CPDF_LinearizedHeader {
public:
~CPDF_LinearizedHeader();
- static std::unique_ptr<CPDF_LinearizedHeader> CreateForObject(
- std::unique_ptr<CPDF_Object> pObj);
+ static std::unique_ptr<CPDF_LinearizedHeader> Parse(
+ CPDF_SyntaxParser* parser);
// Will only return values > 0.
FX_FILESIZE GetFileSize() const { return m_szFileSize; }
@@ -33,6 +34,8 @@ class CPDF_LinearizedHeader {
FX_FILESIZE GetFirstPageEndOffset() const { return m_szFirstPageEndOffset; }
// Will only return values > 0.
uint32_t GetFirstPageObjNum() const { return m_FirstPageObjNum; }
+ // Will only return values > 0.
+ FX_FILESIZE GetLastXRefOffset() const { return m_szLastXRefOffset; }
bool HasHintTable() const;
// Will only return values > 0.
@@ -51,6 +54,7 @@ class CPDF_LinearizedHeader {
uint32_t m_FirstPageObjNum = 0;
FX_FILESIZE m_szHintStart = 0;
uint32_t m_HintLength = 0;
+ FX_FILESIZE m_szLastXRefOffset = 0;
};
#endif // CORE_FPDFAPI_PARSER_CPDF_LINEARIZED_HEADER_H_
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index 6957c84071..7a8f4f9ce7 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -1280,34 +1280,8 @@ uint32_t CPDF_Parser::GetPermissions() const {
return dwPermission;
}
-bool CPDF_Parser::ParseLinearizedHeader() {
- m_pSyntax->SetPos(m_pSyntax->m_HeaderOffset + 9);
-
- FX_FILESIZE SavedPos = m_pSyntax->GetPos();
- bool bIsNumber;
- ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
- if (!bIsNumber)
- return false;
-
- word = m_pSyntax->GetNextWord(&bIsNumber);
- if (!bIsNumber)
- return false;
-
- if (m_pSyntax->GetKeyword() != "obj") {
- m_pSyntax->SetPos(SavedPos);
- return false;
- }
-
- m_pLinearized =
- CPDF_LinearizedHeader::CreateForObject(m_pSyntax->GetObjectBody(nullptr));
- if (!m_pLinearized)
- return false;
-
- // Move parser onto first page xref table start.
- m_pSyntax->GetNextWord(nullptr);
-
- m_LastXRefOffset = m_pSyntax->GetPos();
- return true;
+std::unique_ptr<CPDF_LinearizedHeader> CPDF_Parser::ParseLinearizedHeader() {
+ return CPDF_LinearizedHeader::Parse(m_pSyntax.get());
}
CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
@@ -1320,12 +1294,14 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
if (!InitSyntaxParser(pFileAccess))
return FORMAT_ERROR;
- if (!ParseLinearizedHeader())
+ m_pLinearized = ParseLinearizedHeader();
+ if (!m_pLinearized)
return StartParseInternal(std::move(pDocument));
m_bHasParsed = true;
m_pDocument = pDocument;
+ m_LastXRefOffset = m_pLinearized->GetLastXRefOffset();
FX_FILESIZE dwFirstXRefOffset = m_LastXRefOffset;
bool bXRefRebuilt = false;
bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, false);
diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h
index a58838e5fb..6c8cfbd0f8 100644
--- a/core/fpdfapi/parser/cpdf_parser.h
+++ b/core/fpdfapi/parser/cpdf_parser.h
@@ -166,7 +166,7 @@ class CPDF_Parser {
bool LoadLinearizedAllCrossRefV5(FX_FILESIZE pos);
Error LoadLinearizedMainXRefTable();
RetainPtr<CPDF_StreamAcc> GetObjectStream(uint32_t number);
- bool ParseLinearizedHeader();
+ std::unique_ptr<CPDF_LinearizedHeader> ParseLinearizedHeader();
void SetEncryptDictionary(CPDF_Dictionary* pDict);
void ShrinkObjectMap(uint32_t size);
// A simple check whether the cross reference table matches with