summaryrefslogtreecommitdiff
path: root/core/fpdfapi/parser/cpdf_parser.cpp
diff options
context:
space:
mode:
authorArtem Strygin <art-snake@yandex-team.ru>2017-10-02 22:08:44 +0300
committerChromium commit bot <commit-bot@chromium.org>2017-10-04 15:41:16 +0000
commita5fc8975c865dc3cc90de8ff46ca13fb46c13391 (patch)
tree36b38e781140fc31eeec8a55d85299911117ac2b /core/fpdfapi/parser/cpdf_parser.cpp
parent4db6e37b18648dfe2c94b672276c7bf6554fd9d4 (diff)
downloadpdfium-a5fc8975c865dc3cc90de8ff46ca13fb46c13391.tar.xz
Unify parsing of cross ref table v4.
We can use 'Prev' value of first-page cross-reference table trailer for load main cross-reference table, instead of 'T' value of Linearized header (Offset of first entry in main cross-reference table). This is better solution, because this is allow us check entry count in main cross-ref table and unify loading of main cross-ref table with loading methods of non linearized document. See PDF specification: http://wwwimages.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf#678 (page 678, Example 3/part 3) Change-Id: I59dcf3c73a0fb561221ded78e827e40535dbd717 Reviewed-on: https://pdfium-review.googlesource.com/13810 Commit-Queue: Art Snake <art-snake@yandex-team.ru> Reviewed-by: dsinclair <dsinclair@chromium.org>
Diffstat (limited to 'core/fpdfapi/parser/cpdf_parser.cpp')
-rw-r--r--core/fpdfapi/parser/cpdf_parser.cpp58
1 files changed, 15 insertions, 43 deletions
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index c443c8baf2..b6231a7a6a 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -102,8 +102,7 @@ CPDF_Parser::CPDF_Parser()
m_bXRefStream(false),
m_FileVersion(0),
m_pEncryptDict(nullptr),
- m_TrailerData(pdfium::MakeUnique<TrailerData>()),
- m_dwLinearizedFirstPageXRefStartObjNum(0) {}
+ m_TrailerData(pdfium::MakeUnique<TrailerData>()) {}
CPDF_Parser::~CPDF_Parser() {
ReleaseEncryptHandler();
@@ -421,9 +420,8 @@ bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
return true;
}
-bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
- uint32_t dwObjCount) {
- if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount))
+bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos) {
+ if (!LoadCrossRefV4(xrefpos, false))
return false;
std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4();
@@ -477,18 +475,6 @@ bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
return true;
}
-bool CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,
- uint32_t dwObjCount) {
- FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset;
-
- m_pSyntax->SetPos(dwStartPos);
- std::vector<CrossRefObjData> objects;
- if (!ParseAndAppendCrossRefSubsectionData(0, dwObjCount, &objects))
- return false;
- MergeCrossRefObjectsData(objects);
- return true;
-}
-
bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData(
uint32_t start_objnum,
uint32_t count,
@@ -1318,9 +1304,10 @@ bool CPDF_Parser::ParseLinearizedHeader() {
if (!m_pLinearized)
return false;
- m_LastXRefOffset = m_pLinearized->GetLastXRefOffset();
// Move parser onto first page xref table start.
m_pSyntax->GetNextWord(nullptr);
+
+ m_LastXRefOffset = m_pSyntax->GetPos();
return true;
}
@@ -1340,7 +1327,7 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
m_bHasParsed = true;
m_pDocument = pDocument;
- FX_FILESIZE dwFirstXRefOffset = m_pSyntax->GetPos();
+ FX_FILESIZE dwFirstXRefOffset = m_LastXRefOffset;
bool bXRefRebuilt = false;
bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, false);
if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, true)) {
@@ -1350,8 +1337,6 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
bXRefRebuilt = true;
m_LastXRefOffset = 0;
}
- m_dwLinearizedFirstPageXRefStartObjNum =
- m_ObjectInfo.empty() ? 0 : m_ObjectInfo.begin()->first;
if (bLoadV4) {
std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4();
if (!trailer)
@@ -1423,33 +1408,20 @@ bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
}
CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
+ const FX_SAFE_FILESIZE main_xref_offset = GetTrailer()->GetIntegerFor("Prev");
+ if (!main_xref_offset.IsValid())
+ return FORMAT_ERROR;
+
+ if (main_xref_offset.ValueOrDie() == 0)
+ return SUCCESS;
+
const AutoRestorer<uint32_t> save_metadata_objnum(&m_MetadataObjnum);
m_MetadataObjnum = 0;
- m_pSyntax->SetPos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset);
-
- uint8_t ch = 0;
- uint32_t dwCount = 0;
- m_pSyntax->GetNextChar(ch);
- while (PDFCharIsWhitespace(ch)) {
- ++dwCount;
- if (m_pSyntax->m_FileLen <=
- (FX_FILESIZE)(m_pSyntax->GetPos() + m_pSyntax->m_HeaderOffset)) {
- break;
- }
- if (!m_pSyntax->GetNextChar(ch))
- return HANDLER_ERROR;
- }
- m_LastXRefOffset += dwCount;
m_ObjectStreamMap.clear();
m_ObjCache.clear();
- // In linearized document, the main cross ref always should start from 0
- // objnum.
- // And should have count equals to first obj number of first page cross ref
- // table.
- if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset,
- m_dwLinearizedFirstPageXRefStartObjNum) &&
- !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
+ if (!LoadLinearizedAllCrossRefV4(main_xref_offset.ValueOrDie()) &&
+ !LoadLinearizedAllCrossRefV5(main_xref_offset.ValueOrDie())) {
m_LastXRefOffset = 0;
return FORMAT_ERROR;
}