summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorart-snake <art-snake@yandex-team.ru>2016-11-07 08:42:04 -0800
committerCommit bot <commit-bot@chromium.org>2016-11-07 08:42:04 -0800
commit240dec52b2e6502e7deb27a3535af3b1a3e23428 (patch)
treeead5a550c7988ac3291452e524296634423f3012
parenta94fc11866adb1b9ca4a4e1afb4fb574ed472e07 (diff)
downloadpdfium-240dec52b2e6502e7deb27a3535af3b1a3e23428.tar.xz
Reland of Unify some code
Unify some code Move parsing of linearized header into separate CPDF_Linearized class. Original review: https://codereview.chromium.org/2466023002/ Revert review: https://codereview.chromium.org/2474283005/ Revert reason was: Breaking the chrome roll. See https://build.chromium.org/p/tryserver.chromium.linux/builders/linux_chromium_rel_ng/builds/331856 ___ Added Fix for fuzzers. Review-Url: https://codereview.chromium.org/2477213003
-rw-r--r--BUILD.gn2
-rw-r--r--core/fpdfapi/parser/cpdf_data_avail.cpp147
-rw-r--r--core/fpdfapi/parser/cpdf_data_avail.h5
-rw-r--r--core/fpdfapi/parser/cpdf_document.cpp21
-rw-r--r--core/fpdfapi/parser/cpdf_document.h3
-rw-r--r--core/fpdfapi/parser/cpdf_document_unittest.cpp15
-rw-r--r--core/fpdfapi/parser/cpdf_hint_tables.cpp32
-rw-r--r--core/fpdfapi/parser/cpdf_hint_tables.h9
-rw-r--r--core/fpdfapi/parser/cpdf_linearized.cpp71
-rw-r--r--core/fpdfapi/parser/cpdf_linearized.h55
-rw-r--r--core/fpdfapi/parser/cpdf_parser.cpp46
-rw-r--r--core/fpdfapi/parser/cpdf_parser.h6
-rw-r--r--testing/libfuzzer/pdf_hint_table_fuzzer.cc83
13 files changed, 251 insertions, 244 deletions
diff --git a/BUILD.gn b/BUILD.gn
index a54f6e4018..0cf5f54c77 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -500,6 +500,8 @@ static_library("fpdfapi") {
"core/fpdfapi/parser/cpdf_hint_tables.h",
"core/fpdfapi/parser/cpdf_indirect_object_holder.cpp",
"core/fpdfapi/parser/cpdf_indirect_object_holder.h",
+ "core/fpdfapi/parser/cpdf_linearized.cpp",
+ "core/fpdfapi/parser/cpdf_linearized.h",
"core/fpdfapi/parser/cpdf_name.cpp",
"core/fpdfapi/parser/cpdf_name.h",
"core/fpdfapi/parser/cpdf_null.cpp",
diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp
index c4ed95e17f..e6d2c61ad3 100644
--- a/core/fpdfapi/parser/cpdf_data_avail.cpp
+++ b/core/fpdfapi/parser/cpdf_data_avail.cpp
@@ -15,6 +15,7 @@
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_document.h"
#include "core/fpdfapi/parser/cpdf_hint_tables.h"
+#include "core/fpdfapi/parser/cpdf_linearized.h"
#include "core/fpdfapi/parser/cpdf_name.h"
#include "core/fpdfapi/parser/cpdf_number.h"
#include "core/fpdfapi/parser/cpdf_reference.h"
@@ -43,7 +44,6 @@ CPDF_DataAvail::CPDF_DataAvail(FileAvail* pFileAvail,
m_dwCurrentOffset = 0;
m_dwXRefOffset = 0;
m_bufferOffset = 0;
- m_dwFirstPageNo = 0;
m_bufferSize = 0;
m_PagesObjNum = 0;
m_dwCurrentXRefSteam = 0;
@@ -56,7 +56,6 @@ CPDF_DataAvail::CPDF_DataAvail(FileAvail* pFileAvail,
m_bDocAvail = false;
m_bMainXRefLoadTried = false;
m_bDocAvail = false;
- m_bLinearized = false;
m_bPagesLoad = false;
m_bPagesTreeLoad = false;
m_bMainXRefLoadedOK = false;
@@ -66,7 +65,6 @@ CPDF_DataAvail::CPDF_DataAvail(FileAvail* pFileAvail,
m_bPageLoadedOK = false;
m_bNeedDownLoadResource = false;
m_bLinearizedFormParamLoad = false;
- m_pLinearized = nullptr;
m_pRoot = nullptr;
m_pTrailer = nullptr;
m_pCurrentParser = nullptr;
@@ -83,9 +81,6 @@ CPDF_DataAvail::CPDF_DataAvail(FileAvail* pFileAvail,
CPDF_DataAvail::~CPDF_DataAvail() {
m_pHintTables.reset();
- delete m_pLinearized;
- delete m_pRoot;
- delete m_pTrailer;
for (CPDF_Object* pObject : m_arrayAcroforms)
delete pObject;
@@ -613,48 +608,27 @@ bool CPDF_DataAvail::CheckHeader(DownloadHints* pHints) {
}
bool CPDF_DataAvail::CheckFirstPage(DownloadHints* pHints) {
- CPDF_Dictionary* pDict = m_pLinearized->GetDict();
- CPDF_Object* pEndOffSet = pDict ? pDict->GetObjectFor("E") : nullptr;
- if (!pEndOffSet) {
- m_docStatus = PDF_DATAAVAIL_ERROR;
- return false;
- }
-
- CPDF_Object* pXRefOffset = pDict ? pDict->GetObjectFor("T") : nullptr;
- if (!pXRefOffset) {
- m_docStatus = PDF_DATAAVAIL_ERROR;
- return false;
- }
-
- CPDF_Object* pFileLen = pDict ? pDict->GetObjectFor("L") : nullptr;
- if (!pFileLen) {
+ if (!m_pLinearized->GetFirstPageEndOffset() ||
+ !m_pLinearized->GetFileSize() || !m_pLinearized->GetLastXRefOffset()) {
m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
}
bool bNeedDownLoad = false;
- if (pEndOffSet->IsNumber()) {
- uint32_t dwEnd = pEndOffSet->GetInteger();
- dwEnd += 512;
- if ((FX_FILESIZE)dwEnd > m_dwFileLen)
- dwEnd = (uint32_t)m_dwFileLen;
-
- int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen);
- int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0;
- if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) {
- pHints->AddSegment(iStartPos, iSize);
- bNeedDownLoad = true;
- }
- }
+ uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
+ dwEnd += 512;
+ if ((FX_FILESIZE)dwEnd > m_dwFileLen)
+ dwEnd = (uint32_t)m_dwFileLen;
- m_dwLastXRefOffset = 0;
- FX_FILESIZE dwFileLen = 0;
- if (pXRefOffset->IsNumber())
- m_dwLastXRefOffset = pXRefOffset->GetInteger();
-
- if (pFileLen->IsNumber())
- dwFileLen = pFileLen->GetInteger();
+ int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen);
+ int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0;
+ if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) {
+ pHints->AddSegment(iStartPos, iSize);
+ bNeedDownLoad = true;
+ }
+ m_dwLastXRefOffset = m_pLinearized->GetLastXRefOffset();
+ FX_FILESIZE dwFileLen = m_pLinearized->GetFileSize();
if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset,
(uint32_t)(dwFileLen - m_dwLastXRefOffset))) {
if (m_docStatus == PDF_DATAAVAIL_FIRSTPAGE) {
@@ -702,52 +676,17 @@ bool CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset,
}
bool CPDF_DataAvail::CheckHintTables(DownloadHints* pHints) {
- CPDF_Dictionary* pDict = m_pLinearized->GetDict();
- if (!pDict) {
- m_docStatus = PDF_DATAAVAIL_ERROR;
- return false;
- }
-
- // The actual value is not required here, but validate its existence and type.
- CPDF_Number* pFirstPage = ToNumber(pDict->GetDirectObjectFor("O"));
- if (!pFirstPage || !pFirstPage->IsInteger()) {
- m_docStatus = PDF_DATAAVAIL_ERROR;
- return false;
- }
-
- CPDF_Number* pPageCount = ToNumber(pDict->GetDirectObjectFor("N"));
- if (!pPageCount || !pPageCount->IsInteger()) {
- m_docStatus = PDF_DATAAVAIL_ERROR;
- return false;
- }
-
- int nPageCount = pPageCount->GetInteger();
- if (nPageCount <= 1) {
+ if (m_pLinearized->GetPageCount() <= 1) {
m_docStatus = PDF_DATAAVAIL_DONE;
return true;
}
-
- CPDF_Array* pHintStreamRange = pDict->GetArrayFor("H");
- size_t nHintStreamSize = pHintStreamRange ? pHintStreamRange->GetCount() : 0;
- if (nHintStreamSize != 2 && nHintStreamSize != 4) {
+ if (!m_pLinearized->HasHintTable()) {
m_docStatus = PDF_DATAAVAIL_ERROR;
return false;
}
- for (const CPDF_Object* pArrayObject : *pHintStreamRange) {
- const CPDF_Number* pNumber = ToNumber(pArrayObject->GetDirect());
- if (!pNumber || !pNumber->IsInteger()) {
- m_docStatus = PDF_DATAAVAIL_ERROR;
- return false;
- }
- }
-
- FX_FILESIZE szHintStart = pHintStreamRange->GetIntegerAt(0);
- FX_FILESIZE szHintLength = pHintStreamRange->GetIntegerAt(1);
- if (szHintStart < 0 || szHintLength <= 0) {
- m_docStatus = PDF_DATAAVAIL_ERROR;
- return false;
- }
+ FX_FILESIZE szHintStart = m_pLinearized->GetHintStart();
+ FX_FILESIZE szHintLength = m_pLinearized->GetHintLength();
if (!IsDataAvail(szHintStart, szHintLength, pHints))
return false;
@@ -755,7 +694,7 @@ bool CPDF_DataAvail::CheckHintTables(DownloadHints* pHints) {
m_syntaxParser.InitParser(m_pFileRead, m_dwHeaderOffset);
std::unique_ptr<CPDF_HintTables> pHintTables(
- new CPDF_HintTables(this, pDict));
+ new CPDF_HintTables(this, m_pLinearized.get()));
std::unique_ptr<CPDF_Object> pHintStream(
ParseIndirectObjectAt(szHintStart, 0));
CPDF_Stream* pStream = ToStream(pHintStream.get());
@@ -819,12 +758,12 @@ CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
}
bool CPDF_DataAvail::IsLinearized() {
- return m_bLinearized;
+ return !!m_pLinearized;
}
bool CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, uint32_t dwLen) {
if (m_pLinearized)
- return m_bLinearized;
+ return true;
ScopedFileStream file(FX_CreateMemoryStream(pData, (size_t)dwLen, false));
@@ -844,27 +783,13 @@ bool CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, uint32_t dwLen) {
return false;
uint32_t objnum = FXSYS_atoui(wordObjNum.c_str());
- m_pLinearized =
- ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum);
- if (!m_pLinearized)
- return false;
-
- CPDF_Dictionary* pDict = m_pLinearized->GetDict();
- if (!pDict || !pDict->GetObjectFor("Linearized"))
+ m_pLinearized = CPDF_Linearized::CreateForObject(pdfium::WrapUnique(
+ ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum)));
+ if (!m_pLinearized ||
+ m_pLinearized->GetFileSize() != m_pFileRead->GetSize()) {
+ m_pLinearized.reset();
return false;
-
- CPDF_Object* pLen = pDict->GetObjectFor("L");
- if (!pLen)
- return false;
-
- if ((FX_FILESIZE)pLen->GetInteger() != m_pFileRead->GetSize())
- return false;
-
- m_bLinearized = true;
-
- if (CPDF_Number* pNo = ToNumber(pDict->GetObjectFor("P")))
- m_dwFirstPageNo = pNo->GetInteger();
-
+ }
return true;
}
@@ -1600,8 +1525,8 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
if (pdfium::ContainsKey(m_pagesLoadState, dwPage))
return DataAvailable;
- if (m_bLinearized) {
- if (dwPage == m_dwFirstPageNo) {
+ if (m_pLinearized) {
+ if (dwPage == m_pLinearized->GetFirstPageNo()) {
DocAvailStatus nRet = CheckLinearizedFirstPage(dwPage, pHints);
if (nRet == DataAvailable)
m_pagesLoadState.insert(dwPage);
@@ -1731,11 +1656,8 @@ void CPDF_DataAvail::GetLinearizedMainXRefInfo(FX_FILESIZE* pPos,
}
int CPDF_DataAvail::GetPageCount() const {
- if (m_pLinearized) {
- CPDF_Dictionary* pDict = m_pLinearized->GetDict();
- CPDF_Object* pObj = pDict ? pDict->GetDirectObjectFor("N") : nullptr;
- return pObj ? pObj->GetInteger() : 0;
- }
+ if (m_pLinearized)
+ return m_pLinearized->GetPageCount();
return m_pDocument ? m_pDocument->GetPageCount() : 0;
}
@@ -1748,10 +1670,7 @@ CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) {
if (!m_pLinearized || !m_pHintTables)
return nullptr;
- CPDF_Dictionary* pDict = m_pLinearized->GetDict();
- CPDF_Object* pObj = pDict ? pDict->GetDirectObjectFor("P") : nullptr;
- int firstPageNum = pObj ? pObj->GetInteger() : 0;
- if (index == firstPageNum)
+ if (index == static_cast<int>(m_pLinearized->GetFirstPageNo()))
return nullptr;
FX_FILESIZE szPageStartPos = 0;
FX_FILESIZE szPageLength = 0;
diff --git a/core/fpdfapi/parser/cpdf_data_avail.h b/core/fpdfapi/parser/cpdf_data_avail.h
index 0a8716a498..bff9f2943b 100644
--- a/core/fpdfapi/parser/cpdf_data_avail.h
+++ b/core/fpdfapi/parser/cpdf_data_avail.h
@@ -16,6 +16,7 @@
class CPDF_Dictionary;
class CPDF_HintTables;
class CPDF_IndirectObjectHolder;
+class CPDF_Linearized;
class CPDF_Parser;
enum PDF_DATAAVAIL_STATUS {
@@ -199,7 +200,7 @@ class CPDF_DataAvail final {
CPDF_Object* m_pRoot;
uint32_t m_dwRootObjNum;
uint32_t m_dwInfoObjNum;
- CPDF_Object* m_pLinearized;
+ std::unique_ptr<CPDF_Linearized> m_pLinearized;
CPDF_Object* m_pTrailer;
bool m_bDocAvail;
FX_FILESIZE m_dwHeaderOffset;
@@ -220,8 +221,6 @@ class CPDF_DataAvail final {
CFX_ArrayTemplate<uint32_t> m_XRefStreamList;
CFX_ArrayTemplate<uint32_t> m_PageObjList;
uint32_t m_PagesObjNum;
- bool m_bLinearized;
- uint32_t m_dwFirstPageNo;
bool m_bLinearedDataOK;
bool m_bMainXRefLoadTried;
bool m_bMainXRefLoadedOK;
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp
index 8e181de97c..d9ffc0b28a 100644
--- a/core/fpdfapi/parser/cpdf_document.cpp
+++ b/core/fpdfapi/parser/cpdf_document.cpp
@@ -17,6 +17,7 @@
#include "core/fpdfapi/page/pageint.h"
#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
+#include "core/fpdfapi/parser/cpdf_linearized.h"
#include "core/fpdfapi/parser/cpdf_number.h"
#include "core/fpdfapi/parser/cpdf_parser.h"
#include "core/fpdfapi/parser/cpdf_reference.h"
@@ -378,23 +379,13 @@ void CPDF_Document::LoadDoc() {
m_PageList.SetSize(RetrievePageCount());
}
-void CPDF_Document::LoadLinearizedDoc(CPDF_Dictionary* pLinearizationParams) {
+void CPDF_Document::LoadLinearizedDoc(
+ const CPDF_Linearized* pLinearizationParams) {
m_bLinearized = true;
LoadDocInternal();
-
- uint32_t dwPageCount = 0;
- CPDF_Object* pCount = pLinearizationParams->GetObjectFor("N");
- if (ToNumber(pCount))
- dwPageCount = pCount->GetInteger();
- m_PageList.SetSize(dwPageCount);
-
- CPDF_Object* pNo = pLinearizationParams->GetObjectFor("P");
- if (ToNumber(pNo))
- m_iFirstPageNo = pNo->GetInteger();
-
- CPDF_Object* pObjNum = pLinearizationParams->GetObjectFor("O");
- if (ToNumber(pObjNum))
- m_dwFirstPageObjNum = pObjNum->GetInteger();
+ m_PageList.SetSize(pLinearizationParams->GetPageCount());
+ m_iFirstPageNo = pLinearizationParams->GetFirstPageNo();
+ m_dwFirstPageObjNum = pLinearizationParams->GetFirstPageObjNum();
}
void CPDF_Document::LoadPages() {
diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h
index 0a99e42c3f..1b18015200 100644
--- a/core/fpdfapi/parser/cpdf_document.h
+++ b/core/fpdfapi/parser/cpdf_document.h
@@ -26,6 +26,7 @@ class CPDF_Font;
class CPDF_FontEncoding;
class CPDF_IccProfile;
class CPDF_Image;
+class CPDF_Linearized;
class CPDF_Parser;
class CPDF_Pattern;
class CPDF_StreamAcc;
@@ -85,7 +86,7 @@ class CPDF_Document : public CPDF_IndirectObjectHolder {
CPDF_IccProfile* LoadIccProfile(CPDF_Stream* pStream);
void LoadDoc();
- void LoadLinearizedDoc(CPDF_Dictionary* pLinearizationParams);
+ void LoadLinearizedDoc(const CPDF_Linearized* pLinearizationParams);
void LoadPages();
void CreateNewDoc();
diff --git a/core/fpdfapi/parser/cpdf_document_unittest.cpp b/core/fpdfapi/parser/cpdf_document_unittest.cpp
index 16d0ade069..e20a5a1d1d 100644
--- a/core/fpdfapi/parser/cpdf_document_unittest.cpp
+++ b/core/fpdfapi/parser/cpdf_document_unittest.cpp
@@ -9,15 +9,13 @@
#include "core/fpdfapi/cpdf_modulemgr.h"
#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
+#include "core/fpdfapi/parser/cpdf_linearized.h"
#include "core/fpdfapi/parser/cpdf_parser.h"
#include "core/fxcrt/fx_memory.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace {
-using ScopedDictionary =
- std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>>;
-
CPDF_Dictionary* CreatePageTreeNode(CPDF_Array* kids,
CPDF_Document* pDoc,
int count) {
@@ -77,6 +75,11 @@ class CPDF_TestDocumentForPages : public CPDF_Document {
private:
std::unique_ptr<CPDF_Dictionary> m_pOwnedRootDict;
};
+
+class TestLinearized : public CPDF_Linearized {
+ public:
+ explicit TestLinearized(CPDF_Dictionary* dict) : CPDF_Linearized(dict) {}
+};
} // namespace
class cpdf_document_test : public testing::Test {
@@ -142,10 +145,12 @@ TEST_F(cpdf_document_test, UseCachedPageObjNumIfHaveNotPagesDict) {
// can be not exists in this case.
// (case, when hint table is used to page check in CPDF_DataAvail).
CPDF_Document document(pdfium::MakeUnique<CPDF_Parser>());
- std::unique_ptr<CPDF_Dictionary> dict(new CPDF_Dictionary());
+ auto dict = pdfium::MakeUnique<CPDF_Dictionary>();
+ dict->SetBooleanFor("Linearized", true);
const int page_count = 100;
dict->SetIntegerFor("N", page_count);
- document.LoadLinearizedDoc(dict.get());
+ TestLinearized linearized(dict.get());
+ document.LoadLinearizedDoc(&linearized);
ASSERT_EQ(page_count, document.GetPageCount());
CPDF_Object* page_stub = new CPDF_Dictionary();
const uint32_t obj_num = document.AddIndirectObject(page_stub);
diff --git a/core/fpdfapi/parser/cpdf_hint_tables.cpp b/core/fpdfapi/parser/cpdf_hint_tables.cpp
index 045b94cac5..bbc57dace9 100644
--- a/core/fpdfapi/parser/cpdf_hint_tables.cpp
+++ b/core/fpdfapi/parser/cpdf_hint_tables.cpp
@@ -12,6 +12,7 @@
#include "core/fpdfapi/parser/cpdf_data_avail.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_document.h"
+#include "core/fpdfapi/parser/cpdf_linearized.h"
#include "core/fpdfapi/parser/cpdf_stream.h"
#include "core/fpdfapi/parser/cpdf_stream_acc.h"
#include "core/fxcrt/fx_safe_types.h"
@@ -34,12 +35,12 @@ bool IsValidPageOffsetHintTableBitCount(uint32_t bits) {
} // namespace
CPDF_HintTables::CPDF_HintTables(CPDF_DataAvail* pDataAvail,
- CPDF_Dictionary* pLinearized)
+ CPDF_Linearized* pLinearized)
: m_pDataAvail(pDataAvail),
- m_pLinearizedDict(pLinearized),
+ m_pLinearized(pLinearized),
m_nFirstPageSharedObjs(0),
m_szFirstPageObjOffset(0) {
- ASSERT(m_pLinearizedDict);
+ ASSERT(m_pLinearized);
}
CPDF_HintTables::~CPDF_HintTables() {}
@@ -487,38 +488,25 @@ bool CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) {
}
int CPDF_HintTables::GetEndOfFirstPageOffset() const {
- CPDF_Object* pOffsetE = m_pLinearizedDict->GetDirectObjectFor("E");
- return pOffsetE ? pOffsetE->GetInteger() : -1;
+ return static_cast<int>(m_pLinearized->GetFirstPageEndOffset());
}
int CPDF_HintTables::GetNumberOfPages() const {
- CPDF_Object* pPageNum = m_pLinearizedDict->GetDirectObjectFor("N");
- return pPageNum ? pPageNum->GetInteger() : 0;
+ return static_cast<int>(m_pLinearized->GetPageCount());
}
int CPDF_HintTables::GetFirstPageObjectNumber() const {
- CPDF_Object* pFirstPageObj = m_pLinearizedDict->GetDirectObjectFor("O");
- return pFirstPageObj ? pFirstPageObj->GetInteger() : -1;
+ return static_cast<int>(m_pLinearized->GetFirstPageObjNum());
}
int CPDF_HintTables::GetFirstPageNumber() const {
- CPDF_Object* pFirstPageNum = m_pLinearizedDict->GetDirectObjectFor("P");
- return pFirstPageNum ? pFirstPageNum->GetInteger() : 0;
+ return static_cast<int>(m_pLinearized->GetFirstPageNo());
}
int CPDF_HintTables::ReadPrimaryHintStreamOffset() const {
- return ReadPrimaryHintStream(0);
+ return static_cast<int>(m_pLinearized->GetHintStart());
}
int CPDF_HintTables::ReadPrimaryHintStreamLength() const {
- return ReadPrimaryHintStream(1);
-}
-
-int CPDF_HintTables::ReadPrimaryHintStream(int index) const {
- CPDF_Array* pRange = m_pLinearizedDict->GetArrayFor("H");
- if (!pRange)
- return -1;
-
- CPDF_Object* pStreamLen = pRange->GetDirectObjectAt(index);
- return pStreamLen ? pStreamLen->GetInteger() : -1;
+ return static_cast<int>(m_pLinearized->GetHintLength());
}
diff --git a/core/fpdfapi/parser/cpdf_hint_tables.h b/core/fpdfapi/parser/cpdf_hint_tables.h
index 84f48cf41f..68b5059b05 100644
--- a/core/fpdfapi/parser/cpdf_hint_tables.h
+++ b/core/fpdfapi/parser/cpdf_hint_tables.h
@@ -14,12 +14,12 @@
#include "core/fxcrt/fx_stream.h"
class CFX_BitStream;
-class CPDF_Dictionary;
+class CPDF_Linearized;
class CPDF_Stream;
class CPDF_HintTables {
public:
- CPDF_HintTables(CPDF_DataAvail* pDataAvail, CPDF_Dictionary* pLinearized);
+ CPDF_HintTables(CPDF_DataAvail* pDataAvail, CPDF_Linearized* pLinearized);
virtual ~CPDF_HintTables();
bool GetPagePos(uint32_t index,
@@ -46,9 +46,6 @@ class CPDF_HintTables {
virtual int ReadPrimaryHintStreamOffset() const;
virtual int ReadPrimaryHintStreamLength() const;
- // Helper for the ReadPrimaryHintStream methods above.
- int ReadPrimaryHintStream(int index) const;
-
uint32_t GetItemLength(uint32_t index,
const std::vector<FX_FILESIZE>& szArray);
@@ -56,7 +53,7 @@ class CPDF_HintTables {
CPDF_DataAvail* const m_pDataAvail;
// Owned by |m_pDataAvail|.
- CPDF_Dictionary* const m_pLinearizedDict;
+ CPDF_Linearized* const m_pLinearized;
uint32_t m_nFirstPageSharedObjs;
FX_FILESIZE m_szFirstPageObjOffset;
diff --git a/core/fpdfapi/parser/cpdf_linearized.cpp b/core/fpdfapi/parser/cpdf_linearized.cpp
new file mode 100644
index 0000000000..ec57f100cb
--- /dev/null
+++ b/core/fpdfapi/parser/cpdf_linearized.cpp
@@ -0,0 +1,71 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "core/fpdfapi/parser/cpdf_linearized.h"
+
+#include "core/fpdfapi/parser/cpdf_array.h"
+#include "core/fpdfapi/parser/cpdf_dictionary.h"
+#include "core/fpdfapi/parser/cpdf_number.h"
+#include "third_party/base/ptr_util.h"
+
+namespace {
+
+template <class T>
+bool IsValidNumericDictionaryValue(const CPDF_Dictionary* pDict,
+ const char* key,
+ T min_value,
+ bool must_exist = true) {
+ if (!pDict->KeyExist(key))
+ return !must_exist;
+ const CPDF_Number* pNum = ToNumber(pDict->GetObjectFor(key));
+ if (!pNum || !pNum->IsInteger())
+ return false;
+ const int raw_value = pNum->GetInteger();
+ if (!pdfium::base::IsValueInRangeForNumericType<T>(raw_value))
+ return false;
+ return static_cast<T>(raw_value) >= min_value;
+}
+
+} // namespace
+
+// static
+std::unique_ptr<CPDF_Linearized> CPDF_Linearized::CreateForObject(
+ std::unique_ptr<CPDF_Object> pObj) {
+ auto pDict = ToDictionary(std::move(pObj));
+ if (!pDict || !pDict->KeyExist("Linearized") ||
+ !IsValidNumericDictionaryValue<FX_FILESIZE>(pDict.get(), "L", 1) ||
+ !IsValidNumericDictionaryValue<uint32_t>(pDict.get(), "P", 0, false) ||
+ !IsValidNumericDictionaryValue<FX_FILESIZE>(pDict.get(), "T", 1) ||
+ !IsValidNumericDictionaryValue<uint32_t>(pDict.get(), "N", 0) ||
+ !IsValidNumericDictionaryValue<FX_FILESIZE>(pDict.get(), "E", 1) ||
+ !IsValidNumericDictionaryValue<uint32_t>(pDict.get(), "O", 1))
+ return nullptr;
+ return pdfium::WrapUnique(new CPDF_Linearized(pDict.get()));
+}
+
+CPDF_Linearized::CPDF_Linearized(const CPDF_Dictionary* pDict) {
+ if (!pDict)
+ return;
+ m_szFileSize = pDict->GetIntegerFor("L");
+ m_dwFirstPageNo = pDict->GetIntegerFor("P");
+ m_szLastXRefOffset = pDict->GetIntegerFor("T");
+ m_PageCount = pDict->GetIntegerFor("N");
+ m_szFirstPageEndOffset = pDict->GetIntegerFor("E");
+ m_FirstPageObjNum = pDict->GetIntegerFor("O");
+ const CPDF_Array* pHintStreamRange = pDict->GetArrayFor("H");
+ const size_t nHintStreamSize =
+ pHintStreamRange ? pHintStreamRange->GetCount() : 0;
+ if (nHintStreamSize == 2 || nHintStreamSize == 4) {
+ m_szHintStart = std::max(pHintStreamRange->GetIntegerAt(0), 0);
+ m_szHintLength = std::max(pHintStreamRange->GetIntegerAt(1), 0);
+ }
+}
+
+CPDF_Linearized::~CPDF_Linearized() {}
+
+bool CPDF_Linearized::HasHintTable() const {
+ return GetPageCount() > 1 && GetHintStart() > 0 && GetHintLength() > 0;
+}
diff --git a/core/fpdfapi/parser/cpdf_linearized.h b/core/fpdfapi/parser/cpdf_linearized.h
new file mode 100644
index 0000000000..caf538e3fe
--- /dev/null
+++ b/core/fpdfapi/parser/cpdf_linearized.h
@@ -0,0 +1,55 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_FPDFAPI_PARSER_CPDF_LINEARIZED_H_
+#define CORE_FPDFAPI_PARSER_CPDF_LINEARIZED_H_
+
+#include <memory>
+
+#include "core/fxcrt/fx_memory.h"
+#include "core/fxcrt/fx_stream.h"
+
+class CPDF_Dictionary;
+class CPDF_Object;
+
+class CPDF_Linearized {
+ public:
+ ~CPDF_Linearized();
+ static std::unique_ptr<CPDF_Linearized> CreateForObject(
+ std::unique_ptr<CPDF_Object> pObj);
+
+ // Will only return values > 0.
+ FX_FILESIZE GetFileSize() const { return m_szFileSize; }
+ uint32_t GetFirstPageNo() const { return m_dwFirstPageNo; }
+ // Will only return values > 0.
+ FX_FILESIZE GetLastXRefOffset() const { return m_szLastXRefOffset; }
+ uint32_t GetPageCount() const { return m_PageCount; }
+ // Will only return values > 0.
+ FX_FILESIZE GetFirstPageEndOffset() const { return m_szFirstPageEndOffset; }
+ // Will only return values > 0.
+ uint32_t GetFirstPageObjNum() const { return m_FirstPageObjNum; }
+
+ bool HasHintTable() const;
+ // Will only return values > 0.
+ FX_FILESIZE GetHintStart() const { return m_szHintStart; }
+ // Will only return values > 0.
+ FX_FILESIZE GetHintLength() const { return m_szHintLength; }
+
+ protected:
+ explicit CPDF_Linearized(const CPDF_Dictionary* pDict);
+
+ private:
+ FX_FILESIZE m_szFileSize = 0;
+ uint32_t m_dwFirstPageNo = 0;
+ FX_FILESIZE m_szLastXRefOffset = 0;
+ uint32_t m_PageCount = 0;
+ FX_FILESIZE m_szFirstPageEndOffset = 0;
+ uint32_t m_FirstPageObjNum = 0;
+ FX_FILESIZE m_szHintStart = 0;
+ FX_FILESIZE m_szHintLength = 0;
+};
+
+#endif // CORE_FPDFAPI_PARSER_CPDF_LINEARIZED_H_
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index c8c07bd0b7..044d12d6ef 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -12,6 +12,7 @@
#include "core/fpdfapi/parser/cpdf_crypto_handler.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_document.h"
+#include "core/fpdfapi/parser/cpdf_linearized.h"
#include "core/fpdfapi/parser/cpdf_number.h"
#include "core/fpdfapi/parser/cpdf_reference.h"
#include "core/fpdfapi/parser/cpdf_security_handler.h"
@@ -54,8 +55,6 @@ CPDF_Parser::CPDF_Parser()
m_pTrailer(nullptr),
m_pEncryptDict(nullptr),
m_bVersionUpdated(false),
- m_pLinearized(nullptr),
- m_dwFirstPageNo(0),
m_dwXrefStartObjNum(0) {
m_pSyntax.reset(new CPDF_SyntaxParser);
}
@@ -72,8 +71,6 @@ CPDF_Parser::~CPDF_Parser() {
for (CPDF_Dictionary* trailer : m_Trailers)
delete trailer;
-
- delete m_pLinearized;
}
uint32_t CPDF_Parser::GetLastObjNum() const {
@@ -1402,6 +1399,10 @@ CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict(
return pObj;
}
+uint32_t CPDF_Parser::GetFirstPageNo() const {
+ return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0;
+}
+
CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() {
if (m_pSyntax->GetKeyword() != "trailer")
return nullptr;
@@ -1448,35 +1449,14 @@ bool CPDF_Parser::IsLinearizedFile(IFX_SeekableReadStream* pFileAccess,
return false;
}
- m_pLinearized = m_pSyntax->GetObject(nullptr, objnum, gennum, true);
+ m_pLinearized = CPDF_Linearized::CreateForObject(
+ pdfium::WrapUnique(m_pSyntax->GetObject(nullptr, objnum, gennum, true)));
if (!m_pLinearized)
return false;
-
- CPDF_Dictionary* pDict = m_pLinearized->GetDict();
- if (pDict && pDict->GetObjectFor("Linearized")) {
- m_pSyntax->GetNextWord(nullptr);
-
- CPDF_Object* pLen = pDict->GetObjectFor("L");
- if (!pLen) {
- delete m_pLinearized;
- m_pLinearized = nullptr;
- return false;
- }
-
- if (pLen->GetInteger() != (int)pFileAccess->GetSize())
- return false;
-
- if (CPDF_Number* pNo = ToNumber(pDict->GetObjectFor("P")))
- m_dwFirstPageNo = pNo->GetInteger();
-
- if (CPDF_Number* pTable = ToNumber(pDict->GetObjectFor("T")))
- m_LastXRefOffset = pTable->GetInteger();
-
- return true;
- }
- delete m_pLinearized;
- m_pLinearized = nullptr;
- return false;
+ m_LastXRefOffset = m_pLinearized->GetLastXRefOffset();
+ // Move parser onto first page xref table start.
+ m_pSyntax->GetNextWord(nullptr);
+ return true;
}
CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
@@ -1525,7 +1505,7 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
if (eRet != SUCCESS)
return eRet;
- m_pDocument->LoadLinearizedDoc(m_pLinearized->GetDict());
+ m_pDocument->LoadLinearizedDoc(m_pLinearized.get());
if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
if (bXRefRebuilt)
return FORMAT_ERROR;
@@ -1538,7 +1518,7 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
if (eRet != SUCCESS)
return eRet;
- m_pDocument->LoadLinearizedDoc(m_pLinearized->GetDict());
+ m_pDocument->LoadLinearizedDoc(m_pLinearized.get());
if (!m_pDocument->GetRoot())
return FORMAT_ERROR;
}
diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h
index 3be157a1a2..b8ce7ab294 100644
--- a/core/fpdfapi/parser/cpdf_parser.h
+++ b/core/fpdfapi/parser/cpdf_parser.h
@@ -19,6 +19,7 @@ class CPDF_CryptoHandler;
class CPDF_Dictionary;
class CPDF_Document;
class CPDF_IndirectObjectHolder;
+class CPDF_Linearized;
class CPDF_Object;
class CPDF_SecurityHandler;
class CPDF_StreamAcc;
@@ -88,7 +89,7 @@ class CPDF_Parser {
uint32_t objnum,
FX_FILESIZE* pResultPos);
- uint32_t GetFirstPageNo() const { return m_dwFirstPageNo; }
+ uint32_t GetFirstPageNo() const;
protected:
struct ObjectInfo {
@@ -157,8 +158,7 @@ class CPDF_Parser {
std::set<FX_FILESIZE> m_SortedOffset;
std::vector<CPDF_Dictionary*> m_Trailers;
bool m_bVersionUpdated;
- CPDF_Object* m_pLinearized;
- uint32_t m_dwFirstPageNo;
+ std::unique_ptr<CPDF_Linearized> m_pLinearized;
uint32_t m_dwXrefStartObjNum;
// A map of object numbers to indirect streams. Map owns the streams.
diff --git a/testing/libfuzzer/pdf_hint_table_fuzzer.cc b/testing/libfuzzer/pdf_hint_table_fuzzer.cc
index b01c87216b..ec51517fc0 100644
--- a/testing/libfuzzer/pdf_hint_table_fuzzer.cc
+++ b/testing/libfuzzer/pdf_hint_table_fuzzer.cc
@@ -4,18 +4,11 @@
#include <cstdint>
+#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_hint_tables.h"
-
-struct DummyLinearizedDictionary {
- int end_of_first_page_offset;
- int number_of_pages;
- int first_page_object_number;
- int first_page_number;
- int primary_hint_stream_offset;
- int primary_hint_stream_length;
- int shared_hint_table_offset;
-};
+#include "core/fpdfapi/parser/cpdf_linearized.h"
+#include "third_party/base/ptr_util.h"
int32_t GetData(const int32_t** data32, const uint8_t** data, size_t* size) {
const int32_t* ret = *data32;
@@ -27,64 +20,70 @@ int32_t GetData(const int32_t** data32, const uint8_t** data, size_t* size) {
class HintTableForFuzzing : public CPDF_HintTables {
public:
- HintTableForFuzzing(DummyLinearizedDictionary* dict,
- CPDF_Dictionary* linearized_dict)
- : CPDF_HintTables(nullptr, linearized_dict), dict_(dict) {}
+ HintTableForFuzzing(CPDF_Linearized* pLinearized,
+ int shared_hint_table_offset)
+ : CPDF_HintTables(nullptr, pLinearized),
+ shared_hint_table_offset_(shared_hint_table_offset) {}
~HintTableForFuzzing() {}
void Fuzz(const uint8_t* data, size_t size) {
- if (dict_->shared_hint_table_offset <= 0)
+ if (shared_hint_table_offset_ <= 0)
return;
- if (size < static_cast<size_t>(dict_->shared_hint_table_offset))
+ if (size < static_cast<size_t>(shared_hint_table_offset_))
return;
CFX_BitStream bs;
bs.Init(data, size);
if (!ReadPageHintTable(&bs))
return;
- ReadSharedObjHintTable(&bs, dict_->shared_hint_table_offset);
+ ReadSharedObjHintTable(&bs, shared_hint_table_offset_);
}
private:
- int GetEndOfFirstPageOffset() const override {
- return dict_->end_of_first_page_offset;
- }
- int GetNumberOfPages() const override { return dict_->number_of_pages; }
- int GetFirstPageObjectNumber() const override {
- return dict_->first_page_object_number;
- }
- int GetFirstPageNumber() const override { return dict_->first_page_number; }
- int ReadPrimaryHintStreamOffset() const override {
- return dict_->primary_hint_stream_offset;
- }
- int ReadPrimaryHintStreamLength() const override {
- return dict_->primary_hint_stream_length;
- }
+ int shared_hint_table_offset_;
+};
- DummyLinearizedDictionary* const dict_;
+class FakeLinearized : public CPDF_Linearized {
+ public:
+ explicit FakeLinearized(CPDF_Dictionary* linearized_dict)
+ : CPDF_Linearized(linearized_dict) {}
};
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
- // Need 28 bytes for |dummy_dict|.
+ // Need 28 bytes for |linearized_dict|.
// The header section of page offset hint table is 36 bytes.
// The header section of shared object hint table is 24 bytes.
if (size < 28 + 36 + 24)
return 0;
const int32_t* data32 = reinterpret_cast<const int32_t*>(data);
- DummyLinearizedDictionary dummy_dict;
- dummy_dict.end_of_first_page_offset = GetData(&data32, &data, &size);
- dummy_dict.number_of_pages = GetData(&data32, &data, &size);
- dummy_dict.first_page_object_number = GetData(&data32, &data, &size);
- dummy_dict.first_page_number = GetData(&data32, &data, &size);
- dummy_dict.primary_hint_stream_offset = GetData(&data32, &data, &size);
- dummy_dict.primary_hint_stream_length = GetData(&data32, &data, &size);
- dummy_dict.shared_hint_table_offset = GetData(&data32, &data, &size);
- std::unique_ptr<CPDF_Dictionary> dummy_linearized_dict(new CPDF_Dictionary);
+ auto linearized_dict = pdfium::MakeUnique<CPDF_Dictionary>();
+ // Set initial value.
+ linearized_dict->SetBooleanFor("Linearized", true);
+ // Set first page end offset
+ linearized_dict->SetIntegerFor("E", GetData(&data32, &data, &size));
+ // Set page count
+ linearized_dict->SetIntegerFor("N", GetData(&data32, &data, &size));
+ // Set first page obj num
+ linearized_dict->SetIntegerFor("O", GetData(&data32, &data, &size));
+ // Set first page no
+ linearized_dict->SetIntegerFor("P", GetData(&data32, &data, &size));
+
+ auto hint_info = pdfium::MakeUnique<CPDF_Array>();
+ // Add primary hint stream offset
+ hint_info->AddInteger(GetData(&data32, &data, &size));
+ // Add primary hint stream size
+ hint_info->AddInteger(GetData(&data32, &data, &size));
+ // Set hint stream info.
+ linearized_dict->SetFor("H", hint_info.release());
+
+ const int shared_hint_table_offset = GetData(&data32, &data, &size);
+
{
- HintTableForFuzzing hint_table(&dummy_dict, dummy_linearized_dict.get());
+ FakeLinearized linearized(linearized_dict.get());
+ HintTableForFuzzing hint_table(&linearized, shared_hint_table_offset);
hint_table.Fuzz(data, size);
}
return 0;