summaryrefslogtreecommitdiff
path: root/core/fpdfapi/parser
diff options
context:
space:
mode:
authorArtem Strygin <art-snake@yandex-team.ru>2017-10-02 22:08:44 +0300
committerChromium commit bot <commit-bot@chromium.org>2017-10-04 15:41:16 +0000
commita5fc8975c865dc3cc90de8ff46ca13fb46c13391 (patch)
tree36b38e781140fc31eeec8a55d85299911117ac2b /core/fpdfapi/parser
parent4db6e37b18648dfe2c94b672276c7bf6554fd9d4 (diff)
downloadpdfium-a5fc8975c865dc3cc90de8ff46ca13fb46c13391.tar.xz
Unify parsing of cross ref table v4.
We can use 'Prev' value of first-page cross-reference table trailer for load main cross-reference table, instead of 'T' value of Linearized header (Offset of first entry in main cross-reference table). This is better solution, because this is allow us check entry count in main cross-ref table and unify loading of main cross-ref table with loading methods of non linearized document. See PDF specification: http://wwwimages.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf#678 (page 678, Example 3/part 3) Change-Id: I59dcf3c73a0fb561221ded78e827e40535dbd717 Reviewed-on: https://pdfium-review.googlesource.com/13810 Commit-Queue: Art Snake <art-snake@yandex-team.ru> Reviewed-by: dsinclair <dsinclair@chromium.org>
Diffstat (limited to 'core/fpdfapi/parser')
-rw-r--r--core/fpdfapi/parser/cpdf_data_avail.cpp13
-rw-r--r--core/fpdfapi/parser/cpdf_parser.cpp58
-rw-r--r--core/fpdfapi/parser/cpdf_parser.h4
-rw-r--r--core/fpdfapi/parser/cpdf_parser_embeddertest.cpp16
4 files changed, 43 insertions, 48 deletions
diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp
index 38e857f22f..91fc8c1087 100644
--- a/core/fpdfapi/parser/cpdf_data_avail.cpp
+++ b/core/fpdfapi/parser/cpdf_data_avail.cpp
@@ -1171,13 +1171,22 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData() {
return DataError;
if (!m_bMainXRefLoadTried) {
+ ASSERT(m_pDocument->GetParser()->GetTrailer());
+ const FX_SAFE_FILESIZE main_xref_offset =
+ m_pDocument->GetParser()->GetTrailer()->GetIntegerFor("Prev");
+ if (!main_xref_offset.IsValid())
+ return DataError;
+
+ if (main_xref_offset.ValueOrDie() == 0)
+ return DataAvailable;
+
FX_SAFE_SIZE_T data_size = m_dwFileLen;
- data_size -= m_pLinearized->GetLastXRefOffset();
+ data_size -= main_xref_offset.ValueOrDie();
if (!data_size.IsValid())
return DataError;
if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
- m_pLinearized->GetLastXRefOffset(), data_size.ValueOrDie()))
+ main_xref_offset.ValueOrDie(), data_size.ValueOrDie()))
return DataNotAvailable;
CPDF_Parser::Error eRet =
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index c443c8baf2..b6231a7a6a 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -102,8 +102,7 @@ CPDF_Parser::CPDF_Parser()
m_bXRefStream(false),
m_FileVersion(0),
m_pEncryptDict(nullptr),
- m_TrailerData(pdfium::MakeUnique<TrailerData>()),
- m_dwLinearizedFirstPageXRefStartObjNum(0) {}
+ m_TrailerData(pdfium::MakeUnique<TrailerData>()) {}
CPDF_Parser::~CPDF_Parser() {
ReleaseEncryptHandler();
@@ -421,9 +420,8 @@ bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
return true;
}
-bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
- uint32_t dwObjCount) {
- if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount))
+bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos) {
+ if (!LoadCrossRefV4(xrefpos, false))
return false;
std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4();
@@ -477,18 +475,6 @@ bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
return true;
}
-bool CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,
- uint32_t dwObjCount) {
- FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset;
-
- m_pSyntax->SetPos(dwStartPos);
- std::vector<CrossRefObjData> objects;
- if (!ParseAndAppendCrossRefSubsectionData(0, dwObjCount, &objects))
- return false;
- MergeCrossRefObjectsData(objects);
- return true;
-}
-
bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData(
uint32_t start_objnum,
uint32_t count,
@@ -1318,9 +1304,10 @@ bool CPDF_Parser::ParseLinearizedHeader() {
if (!m_pLinearized)
return false;
- m_LastXRefOffset = m_pLinearized->GetLastXRefOffset();
// Move parser onto first page xref table start.
m_pSyntax->GetNextWord(nullptr);
+
+ m_LastXRefOffset = m_pSyntax->GetPos();
return true;
}
@@ -1340,7 +1327,7 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
m_bHasParsed = true;
m_pDocument = pDocument;
- FX_FILESIZE dwFirstXRefOffset = m_pSyntax->GetPos();
+ FX_FILESIZE dwFirstXRefOffset = m_LastXRefOffset;
bool bXRefRebuilt = false;
bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, false);
if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, true)) {
@@ -1350,8 +1337,6 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
bXRefRebuilt = true;
m_LastXRefOffset = 0;
}
- m_dwLinearizedFirstPageXRefStartObjNum =
- m_ObjectInfo.empty() ? 0 : m_ObjectInfo.begin()->first;
if (bLoadV4) {
std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4();
if (!trailer)
@@ -1423,33 +1408,20 @@ bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
}
CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
+ const FX_SAFE_FILESIZE main_xref_offset = GetTrailer()->GetIntegerFor("Prev");
+ if (!main_xref_offset.IsValid())
+ return FORMAT_ERROR;
+
+ if (main_xref_offset.ValueOrDie() == 0)
+ return SUCCESS;
+
const AutoRestorer<uint32_t> save_metadata_objnum(&m_MetadataObjnum);
m_MetadataObjnum = 0;
- m_pSyntax->SetPos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset);
-
- uint8_t ch = 0;
- uint32_t dwCount = 0;
- m_pSyntax->GetNextChar(ch);
- while (PDFCharIsWhitespace(ch)) {
- ++dwCount;
- if (m_pSyntax->m_FileLen <=
- (FX_FILESIZE)(m_pSyntax->GetPos() + m_pSyntax->m_HeaderOffset)) {
- break;
- }
- if (!m_pSyntax->GetNextChar(ch))
- return HANDLER_ERROR;
- }
- m_LastXRefOffset += dwCount;
m_ObjectStreamMap.clear();
m_ObjCache.clear();
- // In linearized document, the main cross ref always should start from 0
- // objnum.
- // And should have count equals to first obj number of first page cross ref
- // table.
- if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset,
- m_dwLinearizedFirstPageXRefStartObjNum) &&
- !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
+ if (!LoadLinearizedAllCrossRefV4(main_xref_offset.ValueOrDie()) &&
+ !LoadLinearizedAllCrossRefV5(main_xref_offset.ValueOrDie())) {
m_LastXRefOffset = 0;
return FORMAT_ERROR;
}
diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h
index 7c6a75d8c5..96bd6f43a9 100644
--- a/core/fpdfapi/parser/cpdf_parser.h
+++ b/core/fpdfapi/parser/cpdf_parser.h
@@ -162,8 +162,7 @@ class CPDF_Parser {
std::unique_ptr<CPDF_Dictionary> LoadTrailerV4();
Error SetEncryptHandler();
void ReleaseEncryptHandler();
- bool LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, uint32_t dwObjCount);
- bool LoadLinearizedCrossRefV4(FX_FILESIZE pos, uint32_t dwObjCount);
+ bool LoadLinearizedAllCrossRefV4(FX_FILESIZE pos);
bool LoadLinearizedAllCrossRefV5(FX_FILESIZE pos);
Error LoadLinearizedMainXRefTable();
RetainPtr<CPDF_StreamAcc> GetObjectStream(uint32_t number);
@@ -207,7 +206,6 @@ class CPDF_Parser {
ByteString m_Password;
std::unique_ptr<TrailerData> m_TrailerData;
std::unique_ptr<CPDF_LinearizedHeader> m_pLinearized;
- uint32_t m_dwLinearizedFirstPageXRefStartObjNum;
// A map of object numbers to indirect streams.
std::map<uint32_t, RetainPtr<CPDF_StreamAcc>> m_ObjectStreamMap;
diff --git a/core/fpdfapi/parser/cpdf_parser_embeddertest.cpp b/core/fpdfapi/parser/cpdf_parser_embeddertest.cpp
index 99bc2c2d42..6aa3e2785a 100644
--- a/core/fpdfapi/parser/cpdf_parser_embeddertest.cpp
+++ b/core/fpdfapi/parser/cpdf_parser_embeddertest.cpp
@@ -57,3 +57,19 @@ TEST_F(CPDFParserEmbeddertest, Bug_602650) {
TEST_F(CPDFParserEmbeddertest, Bug_757705) {
EXPECT_TRUE(OpenDocument("bug_757705.pdf"));
}
+
+TEST_F(CPDFParserEmbeddertest, LoadMainCrossRefTable) {
+ EXPECT_TRUE(OpenDocument("feature_linearized_loading.pdf", nullptr, true));
+ // To check that main cross ref table is loaded correctly,will be enough to
+ // check that the second page was correctly loaded. Because it is contains
+ // crossrefs for second page.
+ EXPECT_EQ(2, GetPageCount());
+ FPDF_PAGE page = LoadPage(1);
+ EXPECT_NE(nullptr, page);
+ FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
+ EXPECT_NE(nullptr, text_page);
+ // The page should not be blank.
+ EXPECT_LT(0, FPDFText_CountChars(text_page));
+ FPDFText_ClosePage(text_page);
+ UnloadPage(page);
+}