diff options
author | Jun Fang <jun_fang@foxitsoftware.com> | 2015-11-02 13:45:35 +0800 |
---|---|---|
committer | Jun Fang <jun_fang@foxitsoftware.com> | 2015-11-02 13:45:35 +0800 |
commit | d946f3011984755b14d7dcfb05d572e870f93f3f (patch) | |
tree | 86fd77c65b920bf3e606c9048761a8e73ec0c6a2 | |
parent | c88c42f317c0e94c4c7b98949bfe1a495aef07a9 (diff) | |
download | pdfium-d946f3011984755b14d7dcfb05d572e870f93f3f.tar.xz |
Support linearized loading
BUG=446715
R=tsepez@chromium.org
Review URL: https://codereview.chromium.org/1353093003 .
-rw-r--r-- | BUILD.gn | 1 | ||||
-rw-r--r-- | core/include/fpdfapi/fpdf_parser.h | 49 | ||||
-rw-r--r-- | core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp | 646 | ||||
-rw-r--r-- | core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_embeddertest.cpp | 8 | ||||
-rw-r--r-- | core/src/fpdfapi/fpdf_parser/parser_int.h | 49 | ||||
-rw-r--r-- | fpdfsdk/src/fpdf_dataavail.cpp | 22 | ||||
-rw-r--r-- | pdfium.gyp | 1 | ||||
-rw-r--r-- | public/fpdf_dataavail.h | 86 | ||||
-rw-r--r-- | samples/pdfium_test.cc | 174 | ||||
-rw-r--r-- | testing/embedder_test.cpp | 46 | ||||
-rw-r--r-- | testing/embedder_test.h | 3 | ||||
-rw-r--r-- | testing/resources/feature_linearized_loading.pdf | bin | 0 -> 11671 bytes |
12 files changed, 812 insertions, 273 deletions
@@ -321,6 +321,7 @@ static_library("fpdfapi") { "core/src/fpdfapi/fpdf_parser/fpdf_parser_objects.cpp", "core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp", "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp", + "core/src/fpdfapi/fpdf_parser/parser_int.h", "core/src/fpdfapi/fpdf_render/fpdf_render.cpp", "core/src/fpdfapi/fpdf_render/fpdf_render_cache.cpp", "core/src/fpdfapi/fpdf_render/fpdf_render_image.cpp", diff --git a/core/include/fpdfapi/fpdf_parser.h b/core/include/fpdfapi/fpdf_parser.h index 32509eb837..b1334e4c7a 100644 --- a/core/include/fpdfapi/fpdf_parser.h +++ b/core/include/fpdfapi/fpdf_parser.h @@ -7,34 +7,37 @@ #ifndef CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ #define CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ +#include "../../../public/fpdf_dataavail.h" #include "../../../third_party/base/nonstd_unique_ptr.h" #include "../fxcrt/fx_system.h" #include "fpdf_objects.h" -class CPDF_Document; -class CPDF_Parser; -class CPDF_SecurityHandler; -class CPDF_StandardSecurityHandler; -class CPDF_CryptoHandler; -class CPDF_Object; -class IFX_FileRead; class CFDF_Document; class CFDF_Parser; -class CFX_Font; class CFX_AffineMatrix; +class CFX_DIBSource; class CFX_FloatRect; -class CPDF_Point; +class CFX_Font; +class CFX_PrivateData; +class CPDF_ColorSpace; +class CPDF_CryptoHandler; class CPDF_DocPageData; class CPDF_DocRenderData; -class CPDF_ModuleMgr; -class CFX_DIBSource; +class CPDF_Document; class CPDF_Font; -class CPDF_Image; -class CPDF_ColorSpace; -class CPDF_Pattern; class CPDF_FontEncoding; +class CPDF_HintTables; class CPDF_IccProfile; -class CFX_PrivateData; +class CPDF_Image; +class CPDF_ModuleMgr; +class CPDF_Object; +class CPDF_Parser; +class CPDF_Pattern; +class CPDF_Point; +class CPDF_SecurityHandler; +class CPDF_StandardSecurityHandler; +class IFX_FileRead; + #define FPDFPERM_PRINT 0x0004 #define FPDFPERM_MODIFY 0x0008 #define FPDFPERM_EXTRACT 0x0010 @@ -863,12 +866,7 @@ class IFX_DownloadHints { virtual ~IFX_DownloadHints() {} virtual void AddSegment(FX_FILESIZE offset, FX_DWORD size) = 0; }; -#define PDF_IS_LINEARIZED 1 -#define PDF_NOT_LINEARIZED 0 -#define PDF_UNKNOW_LINEARIZED -1 -#define PDFFORM_NOTAVAIL 0 -#define PDFFORM_AVAIL 1 -#define PDFFORM_NOTEXIST 2 + class IPDF_DataAvail { public: static IPDF_DataAvail* Create(IFX_FileAvail* pFileAvail, @@ -878,12 +876,12 @@ class IPDF_DataAvail { IFX_FileAvail* GetFileAvail() const { return m_pFileAvail; } IFX_FileRead* GetFileRead() const { return m_pFileRead; } - virtual FX_BOOL IsDocAvail(IFX_DownloadHints* pHints) = 0; + virtual int IsDocAvail(IFX_DownloadHints* pHints) = 0; virtual void SetDocument(CPDF_Document* pDoc) = 0; - virtual FX_BOOL IsPageAvail(int iPage, IFX_DownloadHints* pHints) = 0; + virtual int IsPageAvail(int iPage, IFX_DownloadHints* pHints) = 0; virtual FX_BOOL IsLinearized() = 0; - virtual int32_t IsFormAvail(IFX_DownloadHints* pHints) = 0; - virtual int32_t IsLinearizedPDF() = 0; + virtual int IsFormAvail(IFX_DownloadHints* pHints) = 0; + virtual int IsLinearizedPDF() = 0; virtual void GetLinearizedMainXRefInfo(FX_FILESIZE* pPos, FX_DWORD* pSize) = 0; @@ -925,6 +923,7 @@ enum PDF_DATAAVAIL_STATUS { PDF_DATAAVAIL_HEADER = 0, PDF_DATAAVAIL_FIRSTPAGE, PDF_DATAAVAIL_FIRSTPAGE_PREPARE, + PDF_DATAAVAIL_HINTTABLE, PDF_DATAAVAIL_END, PDF_DATAAVAIL_CROSSREF, PDF_DATAAVAIL_CROSSREF_ITEM, diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp index 4ce196e90d..93212e77c1 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp @@ -15,6 +15,7 @@ #include "../../../include/fpdfapi/fpdf_parser.h" #include "../../../include/fxcrt/fx_safe_types.h" #include "../fpdf_page/pageint.h" +#include "parser_int.h" namespace { @@ -2679,22 +2680,28 @@ void CPDF_SyntaxParser::GetBinary(uint8_t* buffer, FX_DWORD size) { class CPDF_DataAvail final : public IPDF_DataAvail { public: - CPDF_DataAvail(IFX_FileAvail* pFileAvail, IFX_FileRead* pFileRead); + CPDF_DataAvail(IFX_FileAvail* pFileAvail, + IFX_FileRead* pFileRead, + FX_BOOL bSupportHintTable); ~CPDF_DataAvail() override; - FX_BOOL IsDocAvail(IFX_DownloadHints* pHints) override; + int IsDocAvail(IFX_DownloadHints* pHints) override; void SetDocument(CPDF_Document* pDoc) override; - FX_BOOL IsPageAvail(int iPage, IFX_DownloadHints* pHints) override; + int IsPageAvail(int iPage, IFX_DownloadHints* pHints) override; - int32_t IsFormAvail(IFX_DownloadHints* pHints) override; + int IsFormAvail(IFX_DownloadHints* pHints) override; - int32_t IsLinearizedPDF() override; + int IsLinearizedPDF() override; FX_BOOL IsLinearized() override { return m_bLinearized; } void GetLinearizedMainXRefInfo(FX_FILESIZE* pPos, FX_DWORD* pSize) override; + int GetPageCount() const; + CPDF_Dictionary* GetPage(int index); + + friend class CPDF_HintTables; protected: static const int kMaxDataAvailRecursionDepth = 64; @@ -2709,6 +2716,7 @@ class CPDF_DataAvail final : public IPDF_DataAvail { FX_BOOL CheckDocStatus(IFX_DownloadHints* pHints); FX_BOOL CheckHeader(IFX_DownloadHints* pHints); FX_BOOL CheckFirstPage(IFX_DownloadHints* pHints); + FX_BOOL CheckHintTables(IFX_DownloadHints* pHints); FX_BOOL CheckEnd(IFX_DownloadHints* pHints); FX_BOOL CheckCrossRef(IFX_DownloadHints* pHints); FX_BOOL CheckCrossRefItem(IFX_DownloadHints* pHints); @@ -2731,7 +2739,9 @@ class CPDF_DataAvail final : public IPDF_DataAvail { void SetStartOffset(FX_FILESIZE dwOffset); FX_BOOL GetNextToken(CFX_ByteString& token); FX_BOOL GetNextChar(uint8_t& ch); - CPDF_Object* ParseIndirectObjectAt(FX_FILESIZE pos, FX_DWORD objnum); + CPDF_Object* ParseIndirectObjectAt(FX_FILESIZE pos, + FX_DWORD objnum, + CPDF_IndirectObjects* pObjList = NULL); CPDF_Object* GetObject(FX_DWORD objnum, IFX_DownloadHints* pHints, FX_BOOL* pExistInFile); @@ -2740,7 +2750,7 @@ class CPDF_DataAvail final : public IPDF_DataAvail { FX_BOOL LoadPages(IFX_DownloadHints* pHints); FX_BOOL LoadAllXref(IFX_DownloadHints* pHints); FX_BOOL LoadAllFile(IFX_DownloadHints* pHints); - FX_BOOL CheckLinearizedData(IFX_DownloadHints* pHints); + int32_t CheckLinearizedData(IFX_DownloadHints* pHints); FX_BOOL CheckFileResources(IFX_DownloadHints* pHints); FX_BOOL CheckPageAnnots(int iPage, IFX_DownloadHints* pHints); @@ -2763,6 +2773,9 @@ class CPDF_DataAvail final : public IPDF_DataAvail { FX_BOOL CheckPageCount(IFX_DownloadHints* pHints); FX_BOOL IsFirstCheck(int iPage); void ResetFirstCheck(int iPage); + FX_BOOL IsDataAvail(FX_FILESIZE offset, + FX_DWORD size, + IFX_DownloadHints* pHints); CPDF_Parser m_parser; @@ -2876,6 +2889,9 @@ class CPDF_DataAvail final : public IPDF_DataAvail { std::set<FX_DWORD> m_pageMapCheckState; std::set<FX_DWORD> m_pagesLoadState; + + nonstd::unique_ptr<CPDF_HintTables> m_pHintTables; + FX_BOOL m_bSupportHintTable; }; IPDF_DataAvail::IPDF_DataAvail(IFX_FileAvail* pFileAvail, @@ -2885,14 +2901,15 @@ IPDF_DataAvail::IPDF_DataAvail(IFX_FileAvail* pFileAvail, // static IPDF_DataAvail* IPDF_DataAvail::Create(IFX_FileAvail* pFileAvail, IFX_FileRead* pFileRead) { - return new CPDF_DataAvail(pFileAvail, pFileRead); + return new CPDF_DataAvail(pFileAvail, pFileRead, TRUE); } // static int CPDF_DataAvail::s_CurrentDataAvailRecursionDepth = 0; CPDF_DataAvail::CPDF_DataAvail(IFX_FileAvail* pFileAvail, - IFX_FileRead* pFileRead) + IFX_FileRead* pFileRead, + FX_BOOL bSupportHintTable) : IPDF_DataAvail(pFileAvail, pFileRead) { m_Pos = 0; m_dwFileLen = 0; @@ -2938,6 +2955,7 @@ CPDF_DataAvail::CPDF_DataAvail(IFX_FileAvail* pFileAvail, m_bTotalLoadPageTree = FALSE; m_bCurPageDictLoadOK = FALSE; m_bLinearedDataOK = FALSE; + m_bSupportHintTable = bSupportHintTable; } CPDF_DataAvail::~CPDF_DataAvail() { if (m_pLinearized) { @@ -2949,9 +2967,9 @@ CPDF_DataAvail::~CPDF_DataAvail() { if (m_pTrailer) { m_pTrailer->Release(); } - int32_t i = 0; - int32_t iSize = m_arrayAcroforms.GetSize(); - for (i = 0; i < iSize; ++i) { + + int iSize = m_arrayAcroforms.GetSize(); + for (int i = 0; i < iSize; ++i) { static_cast<CPDF_Object*>(m_arrayAcroforms.GetAt(i))->Release(); } } @@ -3033,28 +3051,11 @@ FX_BOOL CPDF_DataAvail::IsObjectsAvail(CFX_PtrArray& obj_array, CPDF_Reference* pRef = pObj->AsReference(); FX_DWORD dwNum = pRef->GetRefObjNum(); FX_FILESIZE offset; - FX_DWORD original_size = GetObjectSize(dwNum, offset); - pdfium::base::CheckedNumeric<FX_DWORD> size = original_size; - if (size.ValueOrDefault(0) == 0 || offset < 0 || - offset >= m_dwFileLen) { - break; - } - - size += offset; - size += 512; - if (!size.IsValid()) { - break; - } - if (size.ValueOrDie() > m_dwFileLen) { - size = m_dwFileLen - offset; - } else { - size = original_size + 512; - } - if (!size.IsValid()) { + FX_DWORD size = GetObjectSize(dwNum, offset); + if (size == 0 || offset < 0 || offset >= m_dwFileLen) { break; } - if (!m_pFileAvail->IsDataAvail(offset, size.ValueOrDie())) { - pHints->AddSegment(offset, size.ValueOrDie()); + if (!IsDataAvail(offset, size, pHints)) { ret_array.Add(pObj); count++; } else if (!m_objnum_array.Find(dwNum)) { @@ -3086,19 +3087,19 @@ FX_BOOL CPDF_DataAvail::IsObjectsAvail(CFX_PtrArray& obj_array, obj_array.Append(new_obj_array); return IsObjectsAvail(obj_array, FALSE, pHints, ret_array); } -FX_BOOL CPDF_DataAvail::IsDocAvail(IFX_DownloadHints* pHints) { +int CPDF_DataAvail::IsDocAvail(IFX_DownloadHints* pHints) { if (!m_dwFileLen && m_pFileRead) { m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize(); if (!m_dwFileLen) { - return TRUE; + return PDF_DATA_ERROR; } } while (!m_bDocAvail) { if (!CheckDocStatus(pHints)) { - return FALSE; + return PDF_DATA_NOTAVAIL; } } - return TRUE; + return PDF_DATA_AVAIL; } FX_BOOL CPDF_DataAvail::CheckAcroFormSubObject(IFX_DownloadHints* pHints) { if (!m_objs_array.GetSize()) { @@ -3151,6 +3152,8 @@ FX_BOOL CPDF_DataAvail::CheckDocStatus(IFX_DownloadHints* pHints) { case PDF_DATAAVAIL_FIRSTPAGE: case PDF_DATAAVAIL_FIRSTPAGE_PREPARE: return CheckFirstPage(pHints); + case PDF_DATAAVAIL_HINTTABLE: + return CheckHintTables(pHints); case PDF_DATAAVAIL_END: return CheckEnd(pHints); case PDF_DATAAVAIL_CROSSREF: @@ -3235,53 +3238,24 @@ FX_BOOL CPDF_DataAvail::LoadAllXref(IFX_DownloadHints* pHints) { CPDF_Object* CPDF_DataAvail::GetObject(FX_DWORD objnum, IFX_DownloadHints* pHints, FX_BOOL* pExistInFile) { - CPDF_Object* pRet = NULL; - FX_DWORD original_size = 0; + CPDF_Object* pRet = nullptr; + FX_DWORD size = 0; FX_FILESIZE offset = 0; - CPDF_Parser* pParser = NULL; - - if (pExistInFile) { + CPDF_Parser* pParser = nullptr; + if (pExistInFile) *pExistInFile = TRUE; - } if (m_pDocument == NULL) { - original_size = (FX_DWORD)m_parser.GetObjectSize(objnum); + size = (FX_DWORD)m_parser.GetObjectSize(objnum); offset = m_parser.GetObjectOffset(objnum); pParser = &m_parser; } else { - original_size = GetObjectSize(objnum, offset); + size = GetObjectSize(objnum, offset); pParser = (CPDF_Parser*)(m_pDocument->GetParser()); } - - pdfium::base::CheckedNumeric<FX_DWORD> size = original_size; - if (size.ValueOrDefault(0) == 0 || offset < 0 || offset >= m_dwFileLen) { - if (pExistInFile) - *pExistInFile = FALSE; - - return NULL; - } - - size += offset; - size += 512; - if (!size.IsValid()) { - return NULL; - } - - if (size.ValueOrDie() > m_dwFileLen) { - size = m_dwFileLen - offset; - } else { - size = original_size + 512; - } - - if (!size.IsValid()) { - return NULL; - } - - if (!m_pFileAvail->IsDataAvail(offset, size.ValueOrDie())) { - pHints->AddSegment(offset, size.ValueOrDie()); - return NULL; + if (!IsDataAvail(offset, size, pHints)) { + return nullptr; } - if (pParser) { pRet = pParser->ParseIndirectObject(NULL, objnum, NULL); } @@ -3576,15 +3550,82 @@ FX_BOOL CPDF_DataAvail::CheckFirstPage(IFX_DownloadHints* pHints) { } else { m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE; } - if (!bNeedDownLoad && m_docStatus == PDF_DATAAVAIL_FIRSTPAGE_PREPARE) { + if (bNeedDownLoad || m_docStatus != PDF_DATAAVAIL_FIRSTPAGE_PREPARE) { + m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE; + return FALSE; + } + m_docStatus = + m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE; + return TRUE; +} +FX_BOOL CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset, + FX_DWORD size, + IFX_DownloadHints* pHints) { + if (offset > m_dwFileLen) + return TRUE; + FX_SAFE_DWORD safeSize = pdfium::base::checked_cast<FX_DWORD>(offset); + safeSize += size; + safeSize += 512; + if (!safeSize.IsValid() || safeSize.ValueOrDie() > m_dwFileLen) + size = m_dwFileLen - offset; + else + size += 512; + if (!m_pFileAvail->IsDataAvail(offset, size)) { + pHints->AddSegment(offset, size); + return FALSE; + } + return TRUE; +} +FX_BOOL CPDF_DataAvail::CheckHintTables(IFX_DownloadHints* pHints) { + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + if (!pDict) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + if (!pDict->KeyExist(FX_BSTRC("H")) || !pDict->KeyExist(FX_BSTRC("O")) || + !pDict->KeyExist(FX_BSTRC("N"))) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + int nPageCount = pDict->GetElementValue(FX_BSTRC("N"))->GetInteger(); + if (nPageCount <= 1) { m_docStatus = PDF_DATAAVAIL_DONE; return TRUE; } - m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE; - return FALSE; + CPDF_Array* pHintStreamRange = pDict->GetArray(FX_BSTRC("H")); + FX_FILESIZE szHSStart = + pHintStreamRange->GetElementValue(0) + ? pHintStreamRange->GetElementValue(0)->GetInteger() + : 0; + FX_FILESIZE szHSLength = + pHintStreamRange->GetElementValue(1) + ? pHintStreamRange->GetElementValue(1)->GetInteger() + : 0; + if (szHSStart < 0 || szHSLength <= 0) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + if (!IsDataAvail(szHSStart, szHSLength, pHints)) { + return FALSE; + } + m_syntaxParser.InitParser(m_pFileRead, m_dwHeaderOffset); + nonstd::unique_ptr<CPDF_HintTables> pHintTables( + new CPDF_HintTables(this, pDict)); + CPDF_Stream* pHintStream = (CPDF_Stream*)ParseIndirectObjectAt(szHSStart, 0); + FX_BOOL bLoaded = FALSE; + if (pHintTables && pHintStream && pHintStream->GetType() == PDFOBJ_STREAM) { + bLoaded = pHintTables->LoadHintStream(pHintStream); + } + if (!bLoaded) { + m_pHintTables.reset(pHintTables.release()); + } + m_docStatus = PDF_DATAAVAIL_DONE; + return TRUE; } -CPDF_Object* CPDF_DataAvail::ParseIndirectObjectAt(FX_FILESIZE pos, - FX_DWORD objnum) { +CPDF_Object* CPDF_DataAvail::ParseIndirectObjectAt( + FX_FILESIZE pos, + FX_DWORD objnum, + CPDF_IndirectObjects* pObjList) { FX_FILESIZE SavedPos = m_syntaxParser.SavePos(); m_syntaxParser.RestorePos(pos); FX_BOOL bIsNumber; @@ -3605,26 +3646,27 @@ CPDF_Object* CPDF_DataAvail::ParseIndirectObjectAt(FX_FILESIZE pos, m_syntaxParser.RestorePos(SavedPos); return NULL; } - CPDF_Object* pObj = m_syntaxParser.GetObject(NULL, objnum, gennum, 0); + CPDF_Object* pObj = + m_syntaxParser.GetObject(pObjList, parser_objnum, gennum, 0); m_syntaxParser.RestorePos(SavedPos); return pObj; } -int32_t CPDF_DataAvail::IsLinearizedPDF() { +int CPDF_DataAvail::IsLinearizedPDF() { FX_DWORD req_size = 1024; if (!m_pFileAvail->IsDataAvail(0, req_size)) { - return PDF_UNKNOW_LINEARIZED; + return PDF_LINEARIZATION_UNKNOWN; } if (!m_pFileRead) { return PDF_NOT_LINEARIZED; } FX_FILESIZE dwSize = m_pFileRead->GetSize(); if (dwSize < (FX_FILESIZE)req_size) { - return PDF_UNKNOW_LINEARIZED; + return PDF_LINEARIZATION_UNKNOWN; } uint8_t buffer[1024]; m_pFileRead->ReadBlock(buffer, 0, req_size); if (IsLinearizedFile(buffer, req_size)) { - return PDF_IS_LINEARIZED; + return PDF_LINEARIZED; } return PDF_NOT_LINEARIZED; } @@ -4241,36 +4283,35 @@ FX_BOOL CPDF_DataAvail::LoadPages(IFX_DownloadHints* pHints) { m_pDocument->LoadPages(); return FALSE; } -FX_BOOL CPDF_DataAvail::CheckLinearizedData(IFX_DownloadHints* pHints) { +int CPDF_DataAvail::CheckLinearizedData(IFX_DownloadHints* pHints) { if (m_bLinearedDataOK) { - return TRUE; + return PDF_DATA_AVAIL; } if (!m_bMainXRefLoadTried) { FX_SAFE_DWORD data_size = m_dwFileLen; data_size -= m_dwLastXRefOffset; if (!data_size.IsValid()) { - return FALSE; + return PDF_DATA_ERROR; } if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, data_size.ValueOrDie())) { pHints->AddSegment(m_dwLastXRefOffset, data_size.ValueOrDie()); - return FALSE; + return PDF_DATA_NOTAVAIL; } - FX_DWORD dwRet = - ((CPDF_Parser*)m_pDocument->GetParser())->LoadLinearizedMainXRefTable(); + FX_DWORD dwRet = (m_pDocument->GetParser())->LoadLinearizedMainXRefTable(); m_bMainXRefLoadTried = TRUE; if (dwRet != PDFPARSE_ERROR_SUCCESS) { - return FALSE; + return PDF_DATA_ERROR; } if (!PreparePageItem()) { - return FALSE; + return PDF_DATA_NOTAVAIL; } m_bMainXRefLoadedOK = TRUE; m_bLinearedDataOK = TRUE; } - return m_bLinearedDataOK; + return m_bLinearedDataOK ? PDF_DATA_AVAIL : PDF_DATA_NOTAVAIL; } FX_BOOL CPDF_DataAvail::CheckPageAnnots(int32_t iPage, IFX_DownloadHints* pHints) { @@ -4336,9 +4377,9 @@ FX_BOOL CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary* pDict) { } return HaveResourceAncestor(pParentDict); } -FX_BOOL CPDF_DataAvail::IsPageAvail(int32_t iPage, IFX_DownloadHints* pHints) { +int CPDF_DataAvail::IsPageAvail(int32_t iPage, IFX_DownloadHints* pHints) { if (!m_pDocument) { - return FALSE; + return PDF_DATA_ERROR; } if (IsFirstCheck(iPage)) { m_bCurPageDictLoadOK = FALSE; @@ -4349,44 +4390,52 @@ FX_BOOL CPDF_DataAvail::IsPageAvail(int32_t iPage, IFX_DownloadHints* pHints) { m_objnum_array.RemoveAll(); } if (m_pagesLoadState.find(iPage) != m_pagesLoadState.end()) { - return TRUE; + return PDF_DATA_AVAIL; } if (m_bLinearized) { if ((FX_DWORD)iPage == m_dwFirstPageNo) { m_pagesLoadState.insert(iPage); - return TRUE; + return PDF_DATA_AVAIL; } - if (!CheckLinearizedData(pHints)) { - return FALSE; + int32_t nResult = CheckLinearizedData(pHints); + if (nResult != PDF_DATA_AVAIL) { + return nResult; + } + if (m_pHintTables) { + nResult = m_pHintTables->CheckPage(iPage, pHints); + if (nResult != PDF_DATA_AVAIL) + return nResult; + m_pagesLoadState.insert(iPage); + return PDF_DATA_AVAIL; } if (m_bMainXRefLoadedOK) { if (m_bTotalLoadPageTree) { if (!LoadPages(pHints)) { - return FALSE; + return PDF_DATA_NOTAVAIL; } } else { if (!m_bCurPageDictLoadOK && !CheckPage(iPage, pHints)) { - return FALSE; + return PDF_DATA_NOTAVAIL; } } } else { if (!LoadAllFile(pHints)) { - return FALSE; + return PDF_DATA_NOTAVAIL; } ((CPDF_Parser*)m_pDocument->GetParser())->RebuildCrossRef(); ResetFirstCheck(iPage); - return TRUE; + return PDF_DATA_AVAIL; } } else { if (!m_bTotalLoadPageTree) { if (!m_bCurPageDictLoadOK && !CheckPage(iPage, pHints)) { - return FALSE; + return PDF_DATA_NOTAVAIL; } } } if (m_bHaveAcroForm && !m_bAcroFormLoad) { if (!CheckAcroFormSubObject(pHints)) { - return FALSE; + return PDF_DATA_NOTAVAIL; } m_bAcroFormLoad = TRUE; } @@ -4397,7 +4446,7 @@ FX_BOOL CPDF_DataAvail::IsPageAvail(int32_t iPage, IFX_DownloadHints* pHints) { m_pPageDict = m_pDocument->GetPage(iPage); if (!m_pPageDict) { ResetFirstCheck(iPage); - return TRUE; + return PDF_DATA_AVAIL; } CFX_PtrArray obj_array; obj_array.Add(m_pPageDict); @@ -4417,14 +4466,14 @@ FX_BOOL CPDF_DataAvail::IsPageAvail(int32_t iPage, IFX_DownloadHints* pHints) { m_bPageLoadedOK = TRUE; } else { m_objs_array.Append(new_objs_array); - return bRet; + return PDF_DATA_NOTAVAIL; } } } if (m_bPageLoadedOK) { if (!m_bAnnotsLoad) { if (!CheckPageAnnots(iPage, pHints)) { - return FALSE; + return PDF_DATA_NOTAVAIL; } m_bAnnotsLoad = TRUE; } @@ -4440,7 +4489,7 @@ FX_BOOL CPDF_DataAvail::IsPageAvail(int32_t iPage, IFX_DownloadHints* pHints) { if (m_bNeedDownLoadResource) { FX_BOOL bRet = CheckResources(pHints); if (!bRet) { - return FALSE; + return PDF_DATA_NOTAVAIL; } m_bNeedDownLoadResource = FALSE; } @@ -4449,7 +4498,7 @@ FX_BOOL CPDF_DataAvail::IsPageAvail(int32_t iPage, IFX_DownloadHints* pHints) { m_bCurPageDictLoadOK = FALSE; ResetFirstCheck(iPage); m_pagesLoadState.insert(iPage); - return TRUE; + return PDF_DATA_AVAIL; } FX_BOOL CPDF_DataAvail::CheckResources(IFX_DownloadHints* pHints) { if (!m_objs_array.GetSize()) { @@ -4479,21 +4528,57 @@ void CPDF_DataAvail::GetLinearizedMainXRefInfo(FX_FILESIZE* pPos, *pSize = (FX_DWORD)(m_dwFileLen - m_dwLastXRefOffset); } } -int32_t CPDF_DataAvail::IsFormAvail(IFX_DownloadHints* pHints) { +int CPDF_DataAvail::GetPageCount() const { + if (m_pLinearized) { + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + CPDF_Object* pObj = pDict ? pDict->GetElementValue(FX_BSTRC("N")) : nullptr; + return pObj ? pObj->GetInteger() : 0; + } + return m_pDocument ? m_pDocument->GetPageCount() : 0; +} +CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) { + if (!m_pDocument || index < 0 || index >= this->GetPageCount()) { + return nullptr; + } + if (m_pLinearized) { + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + CPDF_Object* pObj = pDict ? pDict->GetElementValue(FX_BSTRC("P")) : nullptr; + int pageNum = pObj ? pObj->GetInteger() : 0; + if (m_pHintTables && index != pageNum) { + FX_FILESIZE szPageStartPos = 0; + FX_FILESIZE szPageLength = 0; + FX_DWORD dwObjNum = 0; + FX_BOOL bPagePosGot = m_pHintTables->GetPagePos(index, szPageStartPos, + szPageLength, dwObjNum); + if (!bPagePosGot) { + return nullptr; + } + m_syntaxParser.InitParser(m_pFileRead, (FX_DWORD)szPageStartPos); + CPDF_Object* pPageDict = ParseIndirectObjectAt(0, dwObjNum, m_pDocument); + if (!pPageDict) { + return nullptr; + } + m_pDocument->InsertIndirectObject(dwObjNum, pPageDict); + return pPageDict->GetDict(); + } + } + return m_pDocument->GetPage(index); +} +int CPDF_DataAvail::IsFormAvail(IFX_DownloadHints* pHints) { if (!m_pDocument) { - return PDFFORM_AVAIL; + return PDF_FORM_AVAIL; } if (!m_bLinearizedFormParamLoad) { CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); if (!pRoot) { - return PDFFORM_AVAIL; + return PDF_FORM_AVAIL; } CPDF_Object* pAcroForm = pRoot->GetElement(FX_BSTRC("AcroForm")); if (!pAcroForm) { - return PDFFORM_NOTEXIST; + return PDF_FORM_NOTEXIST; } if (!CheckLinearizedData(pHints)) { - return PDFFORM_NOTAVAIL; + return PDF_FORM_NOTAVAIL; } if (!m_objs_array.GetSize()) { m_objs_array.Add(pAcroForm->GetDict()); @@ -4505,9 +4590,9 @@ int32_t CPDF_DataAvail::IsFormAvail(IFX_DownloadHints* pHints) { m_objs_array.RemoveAll(); if (!bRet) { m_objs_array.Append(new_objs_array); - return PDFFORM_NOTAVAIL; + return PDF_FORM_NOTAVAIL; } - return PDFFORM_AVAIL; + return PDF_FORM_AVAIL; } void CPDF_SortObjNumArray::AddObjNum(FX_DWORD dwObjNum) { int32_t iNext = 0; @@ -4546,3 +4631,332 @@ CPDF_PageNode::~CPDF_PageNode() { } m_childNode.RemoveAll(); } +CPDF_HintTables::~CPDF_HintTables() { + m_dwDeltaNObjsArray.RemoveAll(); + m_dwNSharedObjsArray.RemoveAll(); + m_dwSharedObjNumArray.RemoveAll(); + m_dwIdentifierArray.RemoveAll(); + m_szPageOffsetArray.RemoveAll(); + m_szSharedObjOffsetArray.RemoveAll(); +} +FX_DWORD CPDF_HintTables::GetItemLength(int index, + const CFX_FileSizeArray& szArray) { + if (index < 0 || szArray.GetSize() < 2 || index > szArray.GetSize() - 2 || + szArray[index] > szArray[index + 1]) + return 0; + return szArray[index + 1] - szArray[index]; +} +FX_BOOL CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) { + if (!hStream) + return FALSE; + int nStreamOffset = ReadPrimaryHintStreamOffset(); + int nStreamLen = ReadPrimaryHintStreamLength(); + if (nStreamOffset < 0 || nStreamLen < 1) + return FALSE; + // Item 1: The least number of objects in a page. + FX_DWORD dwObjLeastNum = hStream->GetBits(32); + // Item 2: The location of the first page's page object. + FX_DWORD dwFirstObjLoc = hStream->GetBits(32); + if (dwFirstObjLoc > nStreamOffset) { + FX_SAFE_DWORD safeLoc = pdfium::base::checked_cast<FX_DWORD>(nStreamLen); + safeLoc += dwFirstObjLoc; + if (!safeLoc.IsValid()) + return FALSE; + m_szFirstPageObjOffset = + pdfium::base::checked_cast<FX_FILESIZE>(safeLoc.ValueOrDie()); + } else { + m_szFirstPageObjOffset = + pdfium::base::checked_cast<FX_FILESIZE>(dwFirstObjLoc); + } + // Item 3: The number of bits needed to represent the difference + // between the greatest and least number of objects in a page. + FX_DWORD dwDeltaObjectsBits = hStream->GetBits(16); + // Item 4: The least length of a page in bytes. + FX_DWORD dwPageLeastLen = hStream->GetBits(32); + // Item 5: The number of bits needed to represent the difference + // between the greatest and least length of a page, in bytes. + FX_DWORD dwDeltaPageLenBits = hStream->GetBits(16); + // Skip Item 6, 7, 8, 9 total 96 bits. + hStream->SkipBits(96); + // Item 10: The number of bits needed to represent the greatest + // number of shared object references. + FX_DWORD dwSharedObjBits = hStream->GetBits(16); + // Item 11: The number of bits needed to represent the numerically + // greatest shared object identifier used by the pages. + FX_DWORD dwSharedIdBits = hStream->GetBits(16); + // Item 12: The number of bits needed to represent the numerator of + // the fractional position for each shared object reference. For each + // shared object referenced from a page, there is an indication of + // where in the page's content stream the object is first referenced. + FX_DWORD dwSharedNumeratorBits = hStream->GetBits(16); + // Item 13: Skip Item 13 which has 16 bits. + FX_DWORD dwSharedDenominator = hStream->GetBits(16); + CPDF_Object* pPageNum = m_pLinearizedDict->GetElementValue(FX_BSTRC("N")); + int nPages = pPageNum ? pPageNum->GetInteger() : 0; + if (nPages < 1) + return FALSE; + for (int i = 0; i < nPages; ++i) { + FX_SAFE_DWORD safeDeltaObj = hStream->GetBits(dwDeltaObjectsBits); + safeDeltaObj += dwObjLeastNum; + if (!safeDeltaObj.IsValid()) + return FALSE; + m_dwDeltaNObjsArray.Add(safeDeltaObj.ValueOrDie()); + } + hStream->ByteAlign(); + CFX_DWordArray dwPageLenArray; + for (int i = 0; i < nPages; ++i) { + FX_SAFE_DWORD safePageLen = hStream->GetBits(dwDeltaPageLenBits); + safePageLen += dwPageLeastLen; + if (!safePageLen.IsValid()) + return FALSE; + dwPageLenArray.Add(safePageLen.ValueOrDie()); + } + CPDF_Object* pOffsetE = m_pLinearizedDict->GetElementValue(FX_BSTRC("E")); + int nOffsetE = pOffsetE ? pOffsetE->GetInteger() : -1; + if (nOffsetE < 0) + return FALSE; + CPDF_Object* pFirstPageNum = + m_pLinearizedDict->GetElementValue(FX_BSTRC("P")); + int nFirstPageNum = pFirstPageNum ? pFirstPageNum->GetInteger() : 0; + for (int i = 0; i < nPages; ++i) { + if (i == nFirstPageNum) { + m_szPageOffsetArray.Add(m_szFirstPageObjOffset); + } else if (i == nFirstPageNum + 1) { + if (i == 1) { + m_szPageOffsetArray.Add(nOffsetE); + } else { + m_szPageOffsetArray.Add(m_szPageOffsetArray[i - 2] + + dwPageLenArray[i - 2]); + } + } else { + if (i == 0) { + m_szPageOffsetArray.Add(nOffsetE); + } else { + m_szPageOffsetArray.Add(m_szPageOffsetArray[i - 1] + + dwPageLenArray[i - 1]); + } + } + } + if (nPages > 0) { + m_szPageOffsetArray.Add(m_szPageOffsetArray[nPages - 1] + + dwPageLenArray[nPages - 1]); + } + hStream->ByteAlign(); + // number of shared objects + for (int i = 0; i < nPages; i++) { + m_dwNSharedObjsArray.Add(hStream->GetBits(dwSharedObjBits)); + } + hStream->ByteAlign(); + // array of identifier, sizes = nshared_objects + for (int i = 0; i < nPages; i++) { + for (int j = 0; j < m_dwNSharedObjsArray[i]; j++) { + m_dwIdentifierArray.Add(hStream->GetBits(dwSharedIdBits)); + } + } + hStream->ByteAlign(); + for (int i = 0; i < nPages; i++) { + FX_SAFE_DWORD safeSize = m_dwNSharedObjsArray[i]; + safeSize *= dwSharedNumeratorBits; + if (!safeSize.IsValid()) + return FALSE; + hStream->SkipBits(safeSize.ValueOrDie()); + } + hStream->ByteAlign(); + FX_SAFE_DWORD safeTotalPageLen = pdfium::base::checked_cast<FX_DWORD>(nPages); + safeTotalPageLen *= dwDeltaPageLenBits; + if (!safeTotalPageLen.IsValid()) + return FALSE; + hStream->SkipBits(safeTotalPageLen.ValueOrDie()); + hStream->ByteAlign(); + return TRUE; +} +FX_BOOL CPDF_HintTables::ReadSharedObjHintTable(CFX_BitStream* hStream) { + if (!hStream) + return FALSE; + int nStreamOffset = ReadPrimaryHintStreamOffset(); + int nStreamLen = ReadPrimaryHintStreamLength(); + if (nStreamOffset < 0 || nStreamLen < 1) + return FALSE; + // Item 1: The object number of the first object in the shared objects + // section. + FX_DWORD dwFirstSharedObjNum = hStream->GetBits(32); + // Item 2: The location of the first object in the shared objects section. + FX_DWORD dwFirstSharedObjLoc = hStream->GetBits(32); + if (dwFirstSharedObjLoc > nStreamOffset) + dwFirstSharedObjLoc += nStreamLen; + // Item 3: The number of shared object entries for the first page. + m_nFirstPageSharedObjs = hStream->GetBits(32); + // Item 4: The number of shared object entries for the shared objects + // section, including the number of shared object entries for the first page. + FX_DWORD dwSharedObjTotal = hStream->GetBits(32); + // Item 5: The number of bits needed to represent the greatest number of + // objects in a shared object group. Skipped. + hStream->SkipBits(16); + // Item 6: The least length of a shared object group in bytes. + FX_DWORD dwGroupLeastLen = hStream->GetBits(32); + // Item 7: The number of bits needed to represent the difference between the + // greatest and least length of a shared object group, in bytes. + FX_DWORD dwDeltaGroupLen = hStream->GetBits(16); + CPDF_Object* pFirstPageObj = + m_pLinearizedDict->GetElementValue(FX_BSTRC("O")); + int nFirstPageObjNum = pFirstPageObj ? pFirstPageObj->GetInteger() : -1; + if (nFirstPageObjNum < 0) + return FALSE; + FX_DWORD dwPrevObjLen = 0; + FX_DWORD dwCurObjLen = 0; + for (int i = 0; i < dwSharedObjTotal; ++i) { + dwPrevObjLen = dwCurObjLen; + FX_SAFE_DWORD safeObjLen = hStream->GetBits(dwDeltaGroupLen); + safeObjLen += dwGroupLeastLen; + if (!safeObjLen.IsValid()) + return FALSE; + dwCurObjLen = safeObjLen.ValueOrDie(); + if (i < m_nFirstPageSharedObjs) { + m_dwSharedObjNumArray.Add(nFirstPageObjNum + i); + if (i == 0) + m_szSharedObjOffsetArray.Add(m_szFirstPageObjOffset); + } else { + FX_SAFE_DWORD safeObjNum = dwFirstSharedObjNum; + safeObjNum += i - m_nFirstPageSharedObjs; + if (!safeObjNum.IsValid()) + return FALSE; + m_dwSharedObjNumArray.Add(safeObjNum.ValueOrDie()); + if (i == m_nFirstPageSharedObjs) + m_szSharedObjOffsetArray.Add( + pdfium::base::checked_cast<int32_t>(dwFirstSharedObjLoc)); + } + if (i != 0 && i != m_nFirstPageSharedObjs) { + FX_SAFE_INT32 safeLoc = pdfium::base::checked_cast<int32_t>(dwPrevObjLen); + safeLoc += m_szSharedObjOffsetArray[i - 1]; + if (!safeLoc.IsValid()) + return FALSE; + m_szSharedObjOffsetArray.Add(safeLoc.ValueOrDie()); + } + } + if (dwSharedObjTotal > 0) { + FX_SAFE_INT32 safeLoc = pdfium::base::checked_cast<int32_t>(dwCurObjLen); + safeLoc += m_szSharedObjOffsetArray[dwSharedObjTotal - 1]; + if (!safeLoc.IsValid()) + return FALSE; + m_szSharedObjOffsetArray.Add(safeLoc.ValueOrDie()); + } + hStream->ByteAlign(); + hStream->SkipBits(dwSharedObjTotal); + hStream->ByteAlign(); + return TRUE; +} +FX_BOOL CPDF_HintTables::GetPagePos(int index, + FX_FILESIZE& szPageStartPos, + FX_FILESIZE& szPageLength, + FX_DWORD& dwObjNum) { + if (!m_pLinearizedDict) + return FALSE; + szPageStartPos = m_szPageOffsetArray[index]; + szPageLength = GetItemLength(index, m_szPageOffsetArray); + CPDF_Object* pFirstPageNum = + m_pLinearizedDict->GetElementValue(FX_BSTRC("P")); + int nFirstPageNum = pFirstPageNum ? pFirstPageNum->GetInteger() : 0; + CPDF_Object* pFirstPageObjNum = + m_pLinearizedDict->GetElementValue(FX_BSTRC("O")); + if (!pFirstPageObjNum) + return FALSE; + int nFirstPageObjNum = pFirstPageObjNum->GetInteger(); + if (index == nFirstPageNum) { + dwObjNum = nFirstPageObjNum; + return TRUE; + } + // The object number of remaining pages starts from 1. + dwObjNum = 1; + for (int i = 0; i < index; ++i) { + if (i == nFirstPageNum) + continue; + dwObjNum += m_dwDeltaNObjsArray[i]; + } + return TRUE; +} +int32_t CPDF_HintTables::CheckPage(int index, IFX_DownloadHints* pHints) { + if (!m_pLinearizedDict || !pHints) + return PDF_DATA_ERROR; + CPDF_Object* pFirstAvailPage = + m_pLinearizedDict->GetElementValue(FX_BSTRC("P")); + int nFirstAvailPage = pFirstAvailPage ? pFirstAvailPage->GetInteger() : 0; + if (index == nFirstAvailPage) + return PDF_DATA_AVAIL; + FX_DWORD dwLength = GetItemLength(index, m_szPageOffsetArray); + if (!dwLength || + !m_pDataAvail->IsDataAvail(m_szPageOffsetArray[index], dwLength, + pHints)) { + return PDF_DATA_NOTAVAIL; + } + // Download data of shared objects in the page. + FX_DWORD offset = 0; + for (int i = 0; i < index; ++i) { + offset += m_dwNSharedObjsArray[i]; + } + CPDF_Object* pFirstPageObj = + m_pLinearizedDict->GetElementValue(FX_BSTRC("O")); + int nFirstPageObjNum = pFirstPageObj ? pFirstPageObj->GetInteger() : -1; + if (nFirstPageObjNum < 0) + return FALSE; + FX_DWORD dwIndex = 0; + FX_DWORD dwObjNum = 0; + for (int j = 0; j < m_dwNSharedObjsArray[index]; ++j) { + dwIndex = m_dwIdentifierArray[offset + j]; + dwObjNum = m_dwSharedObjNumArray[dwIndex]; + if (dwObjNum >= nFirstPageObjNum && + dwObjNum < nFirstPageObjNum + m_nFirstPageSharedObjs) { + continue; + } + dwLength = GetItemLength(dwIndex, m_szSharedObjOffsetArray); + if (!dwLength || + !m_pDataAvail->IsDataAvail(m_szSharedObjOffsetArray[dwIndex], dwLength, + pHints)) { + return PDF_DATA_NOTAVAIL; + } + } + return PDF_DATA_AVAIL; +} +FX_BOOL CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) { + if (!pHintStream || !m_pLinearizedDict) + return FALSE; + CPDF_Dictionary* pDict = pHintStream->GetDict(); + CPDF_Object* pOffset = pDict ? pDict->GetElement(FX_BSTRC("S")) : nullptr; + if (!pOffset || pOffset->GetType() != PDFOBJ_NUMBER) + return FALSE; + CPDF_StreamAcc acc; + acc.LoadAllData(pHintStream); + FX_DWORD size = acc.GetSize(); + // The header section of page offset hint table is 36 bytes. + // The header section of shared object hint table is 24 bytes. + // Hint table has at least 60 bytes. + const FX_DWORD MIN_STREAM_LEN = 60; + if (size < MIN_STREAM_LEN || size < pOffset->GetInteger() || + !pOffset->GetInteger()) { + return FALSE; + } + CFX_BitStream bs; + bs.Init(acc.GetData(), size); + return ReadPageHintTable(&bs) && ReadSharedObjHintTable(&bs); +} +int CPDF_HintTables::ReadPrimaryHintStreamOffset() const { + if (!m_pLinearizedDict) + return -1; + CPDF_Array* pRange = m_pLinearizedDict->GetArray(FX_BSTRC("H")); + if (!pRange) + return -1; + CPDF_Object* pStreamOffset = pRange->GetElementValue(0); + if (!pStreamOffset) + return -1; + return pStreamOffset->GetInteger(); +} +int CPDF_HintTables::ReadPrimaryHintStreamLength() const { + if (!m_pLinearizedDict) + return -1; + CPDF_Array* pRange = m_pLinearizedDict->GetArray(FX_BSTRC("H")); + if (!pRange) + return -1; + CPDF_Object* pStreamLen = pRange->GetElementValue(1); + if (!pStreamLen) + return -1; + return pStreamLen->GetInteger(); +} diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_embeddertest.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_embeddertest.cpp index ed2863bcf3..d5cf3843e8 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_embeddertest.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_embeddertest.cpp @@ -8,7 +8,8 @@ class FPDFParserEmbeddertest : public EmbedderTest {}; TEST_F(FPDFParserEmbeddertest, LoadError_454695) { - // Test trailer dictionary with $$ze instead of Size. + // Test a dictionary with hex string instead of correct content. + // Verify that the defective pdf shouldn't be opened correctly. EXPECT_FALSE(OpenDocument("testing/resources/bug_454695.pdf")); } @@ -28,3 +29,8 @@ TEST_F(FPDFParserEmbeddertest, Bug_544880) { // rather than the actual count (0). (void)GetPageCount(); } + +TEST_F(FPDFParserEmbeddertest, Feature_Linearized_Loading) { + EXPECT_TRUE( + OpenDocument("testing/resources/feature_linearized_loading.pdf", true)); +} diff --git a/core/src/fpdfapi/fpdf_parser/parser_int.h b/core/src/fpdfapi/fpdf_parser/parser_int.h new file mode 100644 index 0000000000..a9d5874f75 --- /dev/null +++ b/core/src/fpdfapi/fpdf_parser/parser_int.h @@ -0,0 +1,49 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_SRC_FPDFAPI_FPDF_PARSER_PARSER_INT_H_ +#define CORE_SRC_FPDFAPI_FPDF_PARSER_PARSER_INT_H_ + +class CPDF_DataAvail; + +class CPDF_HintTables { + public: + CPDF_HintTables(CPDF_DataAvail* pDataAvail, CPDF_Dictionary* pLinearized) + : m_pLinearizedDict(pLinearized), + m_pHintStream(nullptr), + m_pDataAvail(pDataAvail), + m_nFirstPageSharedObjs(0), + m_szFirstPageObjOffset(0) {} + ~CPDF_HintTables(); + FX_BOOL GetPagePos(int index, + FX_FILESIZE& szPageStartPos, + FX_FILESIZE& szPageLength, + FX_DWORD& dwObjNum); + FX_BOOL CheckPage(int index, IFX_DownloadHints* pHints); + FX_BOOL LoadHintStream(CPDF_Stream* pHintStream); + + protected: + FX_BOOL ReadPageHintTable(CFX_BitStream* hStream); + FX_BOOL ReadSharedObjHintTable(CFX_BitStream* hStream); + FX_DWORD GetItemLength(int index, const CFX_FileSizeArray& szArray); + + private: + int ReadPrimaryHintStreamOffset() const; + int ReadPrimaryHintStreamLength() const; + + CPDF_Dictionary* m_pLinearizedDict; + CPDF_Stream* m_pHintStream; + CPDF_DataAvail* m_pDataAvail; + FX_DWORD m_nFirstPageSharedObjs; + FX_FILESIZE m_szFirstPageObjOffset; + CFX_DWordArray m_dwDeltaNObjsArray; + CFX_DWordArray m_dwNSharedObjsArray; + CFX_DWordArray m_dwSharedObjNumArray; + CFX_DWordArray m_dwIdentifierArray; + CFX_FileSizeArray m_szPageOffsetArray; + CFX_FileSizeArray m_szSharedObjOffsetArray; +}; +#endif // CORE_SRC_FPDFAPI_FPDF_PARSER_PARSER_INT_H_ diff --git a/fpdfsdk/src/fpdf_dataavail.cpp b/fpdfsdk/src/fpdf_dataavail.cpp index 04e246b17e..960e497e23 100644 --- a/fpdfsdk/src/fpdf_dataavail.cpp +++ b/fpdfsdk/src/fpdf_dataavail.cpp @@ -87,10 +87,10 @@ DLLEXPORT void STDCALL FPDFAvail_Destroy(FPDF_AVAIL avail) { delete (CFPDF_DataAvail*)avail; } -DLLEXPORT int STDCALL FPDFAvail_IsDocAvail(FPDF_AVAIL avail, - FX_DOWNLOADHINTS* hints) { - if (avail == NULL || hints == NULL) - return 0; +DLLEXPORT int STDCALL +FPDFAvail_IsDocAvail(FPDF_AVAIL avail, FX_DOWNLOADHINTS* hints) { + if (!avail || !hints) + return PDF_DATA_ERROR; CFPDF_DownloadHintsWrap hints_wrap(hints); return ((CFPDF_DataAvail*)avail)->m_pDataAvail->IsDocAvail(&hints_wrap); } @@ -124,8 +124,8 @@ DLLEXPORT int STDCALL FPDFAvail_GetFirstPageNum(FPDF_DOCUMENT doc) { DLLEXPORT int STDCALL FPDFAvail_IsPageAvail(FPDF_AVAIL avail, int page_index, FX_DOWNLOADHINTS* hints) { - if (avail == NULL || hints == NULL) - return 0; + if (!avail || !hints) + return PDF_DATA_ERROR; CFPDF_DownloadHintsWrap hints_wrap(hints); return ((CFPDF_DataAvail*)avail) ->m_pDataAvail->IsPageAvail(page_index, &hints_wrap); @@ -133,14 +133,14 @@ DLLEXPORT int STDCALL FPDFAvail_IsPageAvail(FPDF_AVAIL avail, DLLEXPORT int STDCALL FPDFAvail_IsFormAvail(FPDF_AVAIL avail, FX_DOWNLOADHINTS* hints) { - if (avail == NULL || hints == NULL) - return -1; + if (!avail || !hints) + return PDF_FORM_ERROR; CFPDF_DownloadHintsWrap hints_wrap(hints); return ((CFPDF_DataAvail*)avail)->m_pDataAvail->IsFormAvail(&hints_wrap); } -DLLEXPORT FPDF_BOOL STDCALL FPDFAvail_IsLinearized(FPDF_AVAIL avail) { - if (avail == NULL) - return -1; +DLLEXPORT int STDCALL FPDFAvail_IsLinearized(FPDF_AVAIL avail) { + if (!avail) + return PDF_LINEARIZATION_UNKNOWN; return ((CFPDF_DataAvail*)avail)->m_pDataAvail->IsLinearizedPDF(); } diff --git a/pdfium.gyp b/pdfium.gyp index 67aeade7c4..0eebb713bc 100644 --- a/pdfium.gyp +++ b/pdfium.gyp @@ -297,6 +297,7 @@ 'core/src/fpdfapi/fpdf_parser/fpdf_parser_objects.cpp', 'core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp', 'core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp', + 'core/src/fpdfapi/fpdf_parser/parser_int.h', 'core/src/fpdfapi/fpdf_render/fpdf_render.cpp', 'core/src/fpdfapi/fpdf_render/fpdf_render_cache.cpp', 'core/src/fpdfapi/fpdf_render/fpdf_render_image.cpp', diff --git a/public/fpdf_dataavail.h b/public/fpdf_dataavail.h index c17f0311ab..3ed7d285ed 100644 --- a/public/fpdf_dataavail.h +++ b/public/fpdf_dataavail.h @@ -11,10 +11,16 @@ #include "fpdfview.h" -/** The result of the process which check linearized PDF. */ -#define FSDK_IS_LINEARIZED 1 -#define FSDK_NOT_LINEARIZED 0 -#define FSDK_UNKNOW_LINEARIZED -1 +#define PDF_LINEARIZATION_UNKNOWN -1 +#define PDF_NOT_LINEARIZED 0 +#define PDF_LINEARIZED 1 +#define PDF_DATA_ERROR -1 +#define PDF_DATA_NOTAVAIL 0 +#define PDF_DATA_AVAIL 1 +#define PDF_FORM_ERROR -1 +#define PDF_FORM_NOTAVAIL 0 +#define PDF_FORM_AVAIL 1 +#define PDF_FORM_NOTEXIST 2 #ifdef __cplusplus extern "C" { @@ -130,17 +136,19 @@ typedef struct _FX_DOWNLOADHINTS { * hints - Pointer to a download hints interface, receiving * generated hints * Return value: -* Non-zero for page is fully available, 0 for page not yet available. +* PDF_DATA_ERROR: A common error is returned. It can't tell +* whehter data are availabe or not. +* PDF_DATA_NOTAVAIL: Data are not yet available. +* PDF_DATA_AVAIL: Data are available. * Comments: -* The application should call this function whenever new data arrived, -* and process all the -* generated download hints if any, until the function returns non-zero -* value. Then the -* application can call FPDFAvail_GetDocument() to get a document -* handle. +* Applications should call this function whenever new data arrived, +* and process all the generated download hints if any, until the +* function returns PDF_DATA_ERROR or PDF_DATA_AVAIL. Then +* applications can call FPDFAvail_GetDocument() to get a document +* handle. */ -DLLEXPORT int STDCALL FPDFAvail_IsDocAvail(FPDF_AVAIL avail, - FX_DOWNLOADHINTS* hints); +DLLEXPORT int STDCALL +FPDFAvail_IsDocAvail(FPDF_AVAIL avail, FX_DOWNLOADHINTS* hints); /** * Function: FPDFAvail_GetDocument @@ -189,15 +197,16 @@ DLLEXPORT int STDCALL FPDFAvail_GetFirstPageNum(FPDF_DOCUMENT doc); * hints - Pointer to a download hints interface, receiving * generated hints * Return value: -* Non-zero for page is fully available, 0 for page not yet available. +* PDF_DATA_ERROR: A common error is returned. It can't tell +* whehter data are availabe or not. +* PDF_DATA_NOTAVAIL: Data are not yet available. +* PDF_DATA_AVAIL: Data are available. * Comments: -* This function call be called only after FPDFAvail_GetDocument if -* called. -* The application should call this function whenever new data arrived, -* and process all the -* generated download hints if any, until the function returns non-zero -* value. Then the -* application can perform page loading. +* This function can be called only after FPDFAvail_GetDocument is +* called. Applications should call this function whenever new data +* arrived and process all the generated download hints if any, until +* this function returns PDF_DATA_ERROR or PDF_DATA_AVAIL. Then +* applications can perform page loading. */ DLLEXPORT int STDCALL FPDFAvail_IsPageAvail(FPDF_AVAIL avail, int page_index, @@ -214,16 +223,14 @@ DLLEXPORT int STDCALL FPDFAvail_IsPageAvail(FPDF_AVAIL avail, * hints - Pointer to a download hints interface, receiving * generated hints * Return value: -* Non-zero for Form data is fully available, 0 for Form data not yet -* available. -* Details: -1 - error, the input parameter not correct, such as hints -* is null. -* 0 - data not available -* 1 - data available -* 2 - no form data. +* PDF_FORM_ERROR - A common eror, in general incorrect parameters, +* like 'hints' is nullptr. +* PDF_FORM_NOTAVAIL - data not available +* PDF_FORM_AVAIL - data available +* PDF_FORM_NOTEXIST - no form data * Comments: -* This function call be called only after FPDFAvail_GetDocument if -* called. +* This function can be called only after FPDFAvail_GetDocument is +* called. * The application should call this function whenever new data arrived, * and process all the * generated download hints if any, until the function returns non-zero @@ -243,18 +250,19 @@ DLLEXPORT int STDCALL FPDFAvail_IsFormAvail(FPDF_AVAIL avail, * avail - Handle to document availability provider returned by * FPDFAvail_Create * Return value: -* return TRUE means the document is linearized PDF else not. -* FSDK_IS_LINEARIZED is a linearize file. -* FSDK_NOT_LINEARIZED is not a linearize file. -* FSDK_UNKNOW_LINEARIZED don't know whether the file is a linearize -* file. +* PDF_LINEARIZED is a linearize file. +* PDF_NOT_LINEARIZED is not a linearize file. +* PDF_LINEARIZATION_UNKNOWN doesn't know whether the file is a +*linearize file. +* * Comments: -* It return TRUE/FALSE as soon as we have first 1K data. If the -* file's size less than -* 1K,we don't known whether the PDF is a linearized file. +* It return PDF_LINEARIZED or PDF_NOT_LINEARIZED as soon as +* we have first 1K data. If the file's size less than 1K, it returns +* PDF_LINEARIZATION_UNKNOWN because there is not enough information to +* tell whether a PDF file is a linearized file or not. * */ -DLLEXPORT FPDF_BOOL STDCALL FPDFAvail_IsLinearized(FPDF_AVAIL avail); +DLLEXPORT int STDCALL FPDFAvail_IsLinearized(FPDF_AVAIL avail); #ifdef __cplusplus } diff --git a/samples/pdfium_test.cc b/samples/pdfium_test.cc index 840e140040..fc94fde19f 100644 --- a/samples/pdfium_test.cc +++ b/samples/pdfium_test.cc @@ -344,6 +344,70 @@ FPDF_BOOL Is_Data_Avail(FX_FILEAVAIL* pThis, size_t offset, size_t size) { void Add_Segment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) { } +FPDF_BOOL RenderPage(const std::string& name, + const FPDF_DOCUMENT& doc, + const FPDF_FORMHANDLE& form, + const int page_index, + const Options& options) { + FPDF_PAGE page = FPDF_LoadPage(doc, page_index); + if (!page) { + return FALSE; + } + FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page); + FORM_OnAfterLoadPage(page, form); + FORM_DoPageAAction(page, form, FPDFPAGE_AACTION_OPEN); + + double scale = 1.0; + if (!options.scale_factor_as_string.empty()) { + std::stringstream(options.scale_factor_as_string) >> scale; + } + int width = static_cast<int>(FPDF_GetPageWidth(page) * scale); + int height = static_cast<int>(FPDF_GetPageHeight(page) * scale); + + FPDF_BITMAP bitmap = FPDFBitmap_Create(width, height, 0); + if (!bitmap) { + fprintf(stderr, "Page was too large to be rendered.\n"); + return FALSE; + } + + FPDFBitmap_FillRect(bitmap, 0, 0, width, height, 0xFFFFFFFF); + FPDF_RenderPageBitmap(bitmap, page, 0, 0, width, height, 0, 0); + + FPDF_FFLDraw(form, bitmap, page, 0, 0, width, height, 0, 0); + int stride = FPDFBitmap_GetStride(bitmap); + const char* buffer = + reinterpret_cast<const char*>(FPDFBitmap_GetBuffer(bitmap)); + + switch (options.output_format) { +#ifdef _WIN32 + case OUTPUT_BMP: + WriteBmp(name.c_str(), page_index, buffer, stride, width, height); + break; + + case OUTPUT_EMF: + WriteEmf(page, name.c_str(), page_index); + break; +#endif + case OUTPUT_PNG: + WritePng(name.c_str(), page_index, buffer, stride, width, height); + break; + + case OUTPUT_PPM: + WritePpm(name.c_str(), page_index, buffer, stride, width, height); + break; + + default: + break; + } + + FPDFBitmap_Destroy(bitmap); + FORM_DoPageAAction(page, form, FPDFPAGE_AACTION_CLOSE); + FORM_OnBeforeClosePage(page, form); + FPDFText_ClosePage(text_page); + FPDF_ClosePage(page); + return TRUE; +} + void RenderPdf(const std::string& name, const char* pBuf, size_t len, const Options& options) { fprintf(stderr, "Rendering PDF file %s.\n", name.c_str()); @@ -377,14 +441,34 @@ void RenderPdf(const std::string& name, const char* pBuf, size_t len, hints.AddSegment = Add_Segment; FPDF_DOCUMENT doc; + int nRet = PDF_DATA_NOTAVAIL; + FPDF_BOOL bIsLinearized = FALSE; FPDF_AVAIL pdf_avail = FPDFAvail_Create(&file_avail, &file_access); - (void)FPDFAvail_IsDocAvail(pdf_avail, &hints); - - if (FPDFAvail_IsLinearized(pdf_avail)) + if (FPDFAvail_IsLinearized(pdf_avail) == PDF_LINEARIZED) { + fprintf(stderr, "Linearized path...\n"); doc = FPDFAvail_GetDocument(pdf_avail, nullptr); - else + if (doc) { + while (nRet == PDF_DATA_NOTAVAIL) { + nRet = FPDFAvail_IsDocAvail(pdf_avail, &hints); + } + if (nRet == PDF_DATA_ERROR) { + fprintf(stderr, "Unknown error in checking if doc was available.\n"); + return; + } + nRet = FPDFAvail_IsFormAvail(pdf_avail, &hints); + if (nRet == PDF_FORM_ERROR || nRet == PDF_FORM_NOTAVAIL) { + fprintf(stderr, + "Error %d was returned in checking if form was available.\n", + nRet); + return; + } + bIsLinearized = TRUE; + } + } else { + fprintf(stderr, "Non-linearized path...\n"); doc = FPDF_LoadCustomDocument(&file_access, nullptr); + } if (!doc) { unsigned long err = FPDF_GetLastError(); @@ -421,86 +505,34 @@ void RenderPdf(const std::string& name, const char* pBuf, size_t len, } (void)FPDF_GetDocPermissions(doc); - (void)FPDFAvail_IsFormAvail(pdf_avail, &hints); FPDF_FORMHANDLE form = FPDFDOC_InitFormFillEnvironment(doc, &form_callbacks); FPDF_SetFormFieldHighlightColor(form, 0, 0xFFE4DD); FPDF_SetFormFieldHighlightAlpha(form, 100); - int first_page = FPDFAvail_GetFirstPageNum(doc); - (void)FPDFAvail_IsPageAvail(pdf_avail, first_page, &hints); - - int page_count = FPDF_GetPageCount(doc); - for (int i = 0; i < page_count; ++i) { - (void)FPDFAvail_IsPageAvail(pdf_avail, i, &hints); - } - FORM_DoDocumentJSAction(form); FORM_DoDocumentOpenAction(form); + int page_count = FPDF_GetPageCount(doc); int rendered_pages = 0; int bad_pages = 0; for (int i = 0; i < page_count; ++i) { - FPDF_PAGE page = FPDF_LoadPage(doc, i); - if (!page) { - ++bad_pages; - continue; - } - FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page); - FORM_OnAfterLoadPage(page, form); - FORM_DoPageAAction(page, form, FPDFPAGE_AACTION_OPEN); - - double scale = 1.0; - if (!options.scale_factor_as_string.empty()) { - std::stringstream(options.scale_factor_as_string) >> scale; - } - int width = static_cast<int>(FPDF_GetPageWidth(page) * scale); - int height = static_cast<int>(FPDF_GetPageHeight(page) * scale); - - FPDF_BITMAP bitmap = FPDFBitmap_Create(width, height, 0); - if (!bitmap) { - fprintf(stderr, "Page was too large to be rendered.\n"); - bad_pages++; - continue; + if (bIsLinearized) { + nRet = PDF_DATA_NOTAVAIL; + while (nRet == PDF_DATA_NOTAVAIL) { + nRet = FPDFAvail_IsPageAvail(pdf_avail, i, &hints); + } + if (nRet == PDF_DATA_ERROR) { + fprintf(stderr, "Unknown error in checking if page %d is available.\n", + i); + return; + } } - - FPDFBitmap_FillRect(bitmap, 0, 0, width, height, 0xFFFFFFFF); - FPDF_RenderPageBitmap(bitmap, page, 0, 0, width, height, 0, 0); - ++rendered_pages; - - FPDF_FFLDraw(form, bitmap, page, 0, 0, width, height, 0, 0); - int stride = FPDFBitmap_GetStride(bitmap); - const char* buffer = - reinterpret_cast<const char*>(FPDFBitmap_GetBuffer(bitmap)); - - switch (options.output_format) { -#ifdef _WIN32 - case OUTPUT_BMP: - WriteBmp(name.c_str(), i, buffer, stride, width, height); - break; - - case OUTPUT_EMF: - WriteEmf(page, name.c_str(), i); - break; -#endif - case OUTPUT_PNG: - WritePng(name.c_str(), i, buffer, stride, width, height); - break; - - case OUTPUT_PPM: - WritePpm(name.c_str(), i, buffer, stride, width, height); - break; - - default: - break; + if (RenderPage(name, doc, form, i, options)) { + ++rendered_pages; + } else { + ++bad_pages; } - - FPDFBitmap_Destroy(bitmap); - - FORM_DoPageAAction(page, form, FPDFPAGE_AACTION_CLOSE); - FORM_OnBeforeClosePage(page, form); - FPDFText_ClosePage(text_page); - FPDF_ClosePage(page); } FORM_DoDocumentAAction(form, FPDFDOC_AACTION_WC); diff --git a/testing/embedder_test.cpp b/testing/embedder_test.cpp index b474955566..6151d73afd 100644 --- a/testing/embedder_test.cpp +++ b/testing/embedder_test.cpp @@ -11,6 +11,7 @@ #include <utility> #include <vector> +#include "../public/fpdf_dataavail.h" #include "../public/fpdf_text.h" #include "../public/fpdfview.h" #include "test_support.h" @@ -91,7 +92,8 @@ void EmbedderTest::TearDown() { free(file_contents_); } -bool EmbedderTest::OpenDocument(const std::string& filename) { +bool EmbedderTest::OpenDocument(const std::string& filename, + bool must_linearize) { file_contents_ = GetFileContents(filename.c_str(), &file_length_); if (!file_contents_) return false; @@ -108,18 +110,44 @@ bool EmbedderTest::OpenDocument(const std::string& filename) { hints_.AddSegment = Add_Segment; avail_ = FPDFAvail_Create(&file_avail_, &file_access_); - (void)FPDFAvail_IsDocAvail(avail_, &hints_); - if (!FPDFAvail_IsLinearized(avail_)) - document_ = FPDF_LoadCustomDocument(&file_access_, nullptr); - else + if (FPDFAvail_IsLinearized(avail_) == PDF_LINEARIZED) { document_ = FPDFAvail_GetDocument(avail_, nullptr); - - if (!document_) - return false; + if (!document_) { + return false; + } + int32_t nRet = PDF_DATA_NOTAVAIL; + while (nRet == PDF_DATA_NOTAVAIL) { + nRet = FPDFAvail_IsDocAvail(avail_, &hints_); + } + if (nRet == PDF_DATA_ERROR) { + return false; + } + nRet = FPDFAvail_IsFormAvail(avail_, &hints_); + if (nRet == PDF_FORM_ERROR || nRet == PDF_FORM_NOTAVAIL) { + return false; + } + int page_count = FPDF_GetPageCount(document_); + for (int i = 0; i < page_count; ++i) { + nRet = PDF_DATA_NOTAVAIL; + while (nRet == PDF_DATA_NOTAVAIL) { + nRet = FPDFAvail_IsPageAvail(avail_, i, &hints_); + } + if (nRet == PDF_DATA_ERROR) { + return false; + } + } + } else { + if (must_linearize) { + return false; + } + document_ = FPDF_LoadCustomDocument(&file_access_, nullptr); + if (!document_) { + return false; + } + } (void)FPDF_GetDocPermissions(document_); - (void)FPDFAvail_IsFormAvail(avail_, &hints_); IPDF_JSPLATFORM* platform = static_cast<IPDF_JSPLATFORM*>(this); memset(platform, 0, sizeof(IPDF_JSPLATFORM)); diff --git a/testing/embedder_test.h b/testing/embedder_test.h index fb3ea428f1..035906d6c9 100644 --- a/testing/embedder_test.h +++ b/testing/embedder_test.h @@ -80,7 +80,8 @@ class EmbedderTest : public ::testing::Test, // Open the document specified by |filename|, and create its form fill // environment, or return false on failure. - virtual bool OpenDocument(const std::string& filename); + virtual bool OpenDocument(const std::string& filename, + bool must_linearize = false); // Perform JavaScript actions that are to run at document open time. virtual void DoOpenActions(); diff --git a/testing/resources/feature_linearized_loading.pdf b/testing/resources/feature_linearized_loading.pdf Binary files differnew file mode 100644 index 0000000000..e6995c516d --- /dev/null +++ b/testing/resources/feature_linearized_loading.pdf |