diff options
Diffstat (limited to 'core/fpdfapi/fpdf_parser')
35 files changed, 10760 insertions, 0 deletions
diff --git a/core/fpdfapi/fpdf_parser/cfdf_document.cpp b/core/fpdfapi/fpdf_parser/cfdf_document.cpp new file mode 100644 index 0000000000..fd144a10a9 --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cfdf_document.cpp @@ -0,0 +1,98 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/include/fpdfapi/cfdf_document.h" + +#include "core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" +#include "core/include/fpdfapi/cpdf_dictionary.h" +#include "core/include/fpdfapi/fpdf_serial.h" + +CFDF_Document::CFDF_Document() : CPDF_IndirectObjectHolder(NULL) { + m_pRootDict = NULL; + m_pFile = NULL; + m_bOwnFile = FALSE; +} +CFDF_Document::~CFDF_Document() { + if (m_bOwnFile && m_pFile) { + m_pFile->Release(); + } +} +CFDF_Document* CFDF_Document::CreateNewDoc() { + CFDF_Document* pDoc = new CFDF_Document; + pDoc->m_pRootDict = new CPDF_Dictionary; + pDoc->AddIndirectObject(pDoc->m_pRootDict); + CPDF_Dictionary* pFDFDict = new CPDF_Dictionary; + pDoc->m_pRootDict->SetAt("FDF", pFDFDict); + return pDoc; +} +CFDF_Document* CFDF_Document::ParseFile(IFX_FileRead* pFile, FX_BOOL bOwnFile) { + if (!pFile) { + return NULL; + } + CFDF_Document* pDoc = new CFDF_Document; + pDoc->ParseStream(pFile, bOwnFile); + if (!pDoc->m_pRootDict) { + delete pDoc; + return NULL; + } + return pDoc; +} +CFDF_Document* CFDF_Document::ParseMemory(const uint8_t* pData, FX_DWORD size) { + return CFDF_Document::ParseFile(FX_CreateMemoryStream((uint8_t*)pData, size), + TRUE); +} +void CFDF_Document::ParseStream(IFX_FileRead* pFile, FX_BOOL bOwnFile) { + m_pFile = pFile; + m_bOwnFile = bOwnFile; + CPDF_SyntaxParser parser; + parser.InitParser(m_pFile, 0); + while (1) { + bool bNumber; + CFX_ByteString word = parser.GetNextWord(&bNumber); + if (bNumber) { + FX_DWORD objnum = FXSYS_atoui(word); + word = parser.GetNextWord(&bNumber); + if (!bNumber) { + break; + } + word = parser.GetNextWord(nullptr); + if (word != "obj") { + break; + } + CPDF_Object* pObj = parser.GetObject(this, objnum, 0, true); + if (!pObj) { + break; + } + InsertIndirectObject(objnum, pObj); + word = parser.GetNextWord(nullptr); + if (word != "endobj") { + break; + } + } else { + if (word != "trailer") { + break; + } + if (CPDF_Dictionary* pMainDict = + ToDictionary(parser.GetObject(this, 0, 0, true))) { + m_pRootDict = pMainDict->GetDictBy("Root"); + pMainDict->Release(); + } + break; + } + } +} +FX_BOOL CFDF_Document::WriteBuf(CFX_ByteTextBuf& buf) const { + if (!m_pRootDict) { + return FALSE; + } + buf << "%FDF-1.2\r\n"; + for (const auto& pair : m_IndirectObjs) { + buf << pair.first << " 0 obj\r\n" << pair.second << "\r\nendobj\r\n\r\n"; + } + buf << "trailer\r\n<</Root " << m_pRootDict->GetObjNum() + << " 0 R>>\r\n%%EOF\r\n"; + return TRUE; +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_array.cpp b/core/fpdfapi/fpdf_parser/cpdf_array.cpp new file mode 100644 index 0000000000..3b21a45ae2 --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_array.cpp @@ -0,0 +1,207 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/include/fpdfapi/cpdf_array.h" + +#include "core/include/fpdfapi/cpdf_name.h" +#include "core/include/fpdfapi/cpdf_number.h" +#include "core/include/fpdfapi/cpdf_reference.h" +#include "core/include/fpdfapi/cpdf_stream.h" +#include "core/include/fpdfapi/cpdf_string.h" + +CPDF_Array::CPDF_Array() {} + +CPDF_Array::~CPDF_Array() { + int size = m_Objects.GetSize(); + CPDF_Object** pList = m_Objects.GetData(); + for (int i = 0; i < size; i++) { + if (pList[i]) + pList[i]->Release(); + } +} + +CPDF_Object::Type CPDF_Array::GetType() const { + return ARRAY; +} + +CPDF_Array* CPDF_Array::GetArray() const { + // The method should be made non-const if we want to not be const. + // See bug #234. + return const_cast<CPDF_Array*>(this); +} + +bool CPDF_Array::IsArray() const { + return true; +} + +CPDF_Array* CPDF_Array::AsArray() { + return this; +} + +const CPDF_Array* CPDF_Array::AsArray() const { + return this; +} + +CPDF_Object* CPDF_Array::Clone(FX_BOOL bDirect) const { + CPDF_Array* pCopy = new CPDF_Array(); + for (int i = 0; i < GetCount(); i++) { + CPDF_Object* value = m_Objects.GetAt(i); + pCopy->m_Objects.Add(value->Clone(bDirect)); + } + return pCopy; +} + +CFX_FloatRect CPDF_Array::GetRect() { + CFX_FloatRect rect; + if (!IsArray() || m_Objects.GetSize() != 4) + return rect; + + rect.left = GetNumberAt(0); + rect.bottom = GetNumberAt(1); + rect.right = GetNumberAt(2); + rect.top = GetNumberAt(3); + return rect; +} + +CFX_Matrix CPDF_Array::GetMatrix() { + CFX_Matrix matrix; + if (!IsArray() || m_Objects.GetSize() != 6) + return matrix; + + matrix.Set(GetNumberAt(0), GetNumberAt(1), GetNumberAt(2), GetNumberAt(3), + GetNumberAt(4), GetNumberAt(5)); + return matrix; +} + +CPDF_Object* CPDF_Array::GetElement(FX_DWORD i) const { + if (i >= (FX_DWORD)m_Objects.GetSize()) + return nullptr; + return m_Objects.GetAt(i); +} + +CPDF_Object* CPDF_Array::GetElementValue(FX_DWORD i) const { + if (i >= (FX_DWORD)m_Objects.GetSize()) + return nullptr; + return m_Objects.GetAt(i)->GetDirect(); +} + +CFX_ByteString CPDF_Array::GetStringAt(FX_DWORD i) const { + if (i >= (FX_DWORD)m_Objects.GetSize()) + return CFX_ByteString(); + return m_Objects.GetAt(i)->GetString(); +} + +CFX_ByteStringC CPDF_Array::GetConstStringAt(FX_DWORD i) const { + if (i >= (FX_DWORD)m_Objects.GetSize()) + return CFX_ByteStringC(); + return m_Objects.GetAt(i)->GetConstString(); +} + +int CPDF_Array::GetIntegerAt(FX_DWORD i) const { + if (i >= (FX_DWORD)m_Objects.GetSize()) + return 0; + return m_Objects.GetAt(i)->GetInteger(); +} + +FX_FLOAT CPDF_Array::GetNumberAt(FX_DWORD i) const { + if (i >= (FX_DWORD)m_Objects.GetSize()) + return 0; + return m_Objects.GetAt(i)->GetNumber(); +} + +CPDF_Dictionary* CPDF_Array::GetDictAt(FX_DWORD i) const { + CPDF_Object* p = GetElementValue(i); + if (!p) + return NULL; + if (CPDF_Dictionary* pDict = p->AsDictionary()) + return pDict; + if (CPDF_Stream* pStream = p->AsStream()) + return pStream->GetDict(); + return NULL; +} + +CPDF_Stream* CPDF_Array::GetStreamAt(FX_DWORD i) const { + return ToStream(GetElementValue(i)); +} + +CPDF_Array* CPDF_Array::GetArrayAt(FX_DWORD i) const { + return ToArray(GetElementValue(i)); +} + +void CPDF_Array::RemoveAt(FX_DWORD i, int nCount) { + if (i >= (FX_DWORD)m_Objects.GetSize()) + return; + + if (nCount <= 0 || nCount > m_Objects.GetSize() - i) + return; + + for (int j = 0; j < nCount; ++j) { + if (CPDF_Object* p = m_Objects.GetAt(i + j)) + p->Release(); + } + m_Objects.RemoveAt(i, nCount); +} + +void CPDF_Array::SetAt(FX_DWORD i, + CPDF_Object* pObj, + CPDF_IndirectObjectHolder* pObjs) { + ASSERT(IsArray()); + ASSERT(i < (FX_DWORD)m_Objects.GetSize()); + if (i >= (FX_DWORD)m_Objects.GetSize()) + return; + if (CPDF_Object* pOld = m_Objects.GetAt(i)) + pOld->Release(); + if (pObj->GetObjNum()) { + ASSERT(pObjs); + pObj = new CPDF_Reference(pObjs, pObj->GetObjNum()); + } + m_Objects.SetAt(i, pObj); +} + +void CPDF_Array::InsertAt(FX_DWORD index, + CPDF_Object* pObj, + CPDF_IndirectObjectHolder* pObjs) { + if (pObj->GetObjNum()) { + ASSERT(pObjs); + pObj = new CPDF_Reference(pObjs, pObj->GetObjNum()); + } + m_Objects.InsertAt(index, pObj); +} + +void CPDF_Array::Add(CPDF_Object* pObj, CPDF_IndirectObjectHolder* pObjs) { + if (pObj->GetObjNum()) { + ASSERT(pObjs); + pObj = new CPDF_Reference(pObjs, pObj->GetObjNum()); + } + m_Objects.Add(pObj); +} + +void CPDF_Array::AddName(const CFX_ByteString& str) { + ASSERT(IsArray()); + Add(new CPDF_Name(str)); +} + +void CPDF_Array::AddString(const CFX_ByteString& str) { + ASSERT(IsArray()); + Add(new CPDF_String(str, FALSE)); +} + +void CPDF_Array::AddInteger(int i) { + ASSERT(IsArray()); + Add(new CPDF_Number(i)); +} + +void CPDF_Array::AddNumber(FX_FLOAT f) { + ASSERT(IsArray()); + CPDF_Number* pNumber = new CPDF_Number(f); + Add(pNumber); +} + +void CPDF_Array::AddReference(CPDF_IndirectObjectHolder* pDoc, + FX_DWORD objnum) { + ASSERT(IsArray()); + Add(new CPDF_Reference(pDoc, objnum)); +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_boolean.cpp b/core/fpdfapi/fpdf_parser/cpdf_boolean.cpp new file mode 100644 index 0000000000..75f2203954 --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_boolean.cpp @@ -0,0 +1,45 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/include/fpdfapi/cpdf_boolean.h" + +CPDF_Boolean::CPDF_Boolean() : m_bValue(false) {} + +CPDF_Boolean::CPDF_Boolean(FX_BOOL value) : m_bValue(value) {} + +CPDF_Boolean::~CPDF_Boolean() {} + +CPDF_Object::Type CPDF_Boolean::GetType() const { + return BOOLEAN; +} + +CPDF_Object* CPDF_Boolean::Clone(FX_BOOL bDirect) const { + return new CPDF_Boolean(m_bValue); +} + +CFX_ByteString CPDF_Boolean::GetString() const { + return m_bValue ? "true" : "false"; +} + +int CPDF_Boolean::GetInteger() const { + return m_bValue; +} + +void CPDF_Boolean::SetString(const CFX_ByteString& str) { + m_bValue = (str == "true"); +} + +bool CPDF_Boolean::IsBoolean() const { + return true; +} + +CPDF_Boolean* CPDF_Boolean::AsBoolean() { + return this; +} + +const CPDF_Boolean* CPDF_Boolean::AsBoolean() const { + return this; +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_data_avail.cpp b/core/fpdfapi/fpdf_parser/cpdf_data_avail.cpp new file mode 100644 index 0000000000..93916cba8b --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_data_avail.cpp @@ -0,0 +1,1841 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fpdfapi/fpdf_parser/cpdf_data_avail.h" +#include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h" +#include "core/include/fpdfapi/cpdf_array.h" +#include "core/include/fpdfapi/cpdf_dictionary.h" +#include "core/include/fpdfapi/cpdf_document.h" +#include "core/include/fpdfapi/cpdf_name.h" +#include "core/include/fpdfapi/cpdf_number.h" +#include "core/include/fpdfapi/cpdf_reference.h" +#include "core/include/fpdfapi/cpdf_stream.h" +#include "core/include/fxcrt/fx_ext.h" +#include "core/include/fxcrt/fx_safe_types.h" +#include "third_party/base/stl_util.h" + +IPDF_DataAvail::IPDF_DataAvail(IPDF_DataAvail::FileAvail* pFileAvail, + IFX_FileRead* pFileRead) + : m_pFileAvail(pFileAvail), m_pFileRead(pFileRead) {} + +IPDF_DataAvail::~IPDF_DataAvail() {} + +IPDF_DataAvail::FileAvail::~FileAvail() {} + +IPDF_DataAvail::DownloadHints::~DownloadHints() {} + +// static +IPDF_DataAvail* IPDF_DataAvail::Create(IPDF_DataAvail::FileAvail* pFileAvail, + IFX_FileRead* pFileRead) { + return new CPDF_DataAvail(pFileAvail, pFileRead, TRUE); +} + +// static +int CPDF_DataAvail::s_CurrentDataAvailRecursionDepth = 0; + +CPDF_DataAvail::CPDF_DataAvail(IPDF_DataAvail::FileAvail* pFileAvail, + IFX_FileRead* pFileRead, + FX_BOOL bSupportHintTable) + : IPDF_DataAvail(pFileAvail, pFileRead) { + m_Pos = 0; + m_dwFileLen = 0; + if (m_pFileRead) { + m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize(); + } + m_dwCurrentOffset = 0; + m_dwXRefOffset = 0; + m_bufferOffset = 0; + m_dwFirstPageNo = 0; + m_bufferSize = 0; + m_PagesObjNum = 0; + m_dwCurrentXRefSteam = 0; + m_dwAcroFormObjNum = 0; + m_dwInfoObjNum = 0; + m_pDocument = 0; + m_dwEncryptObjNum = 0; + m_dwPrevXRefOffset = 0; + m_dwLastXRefOffset = 0; + m_bDocAvail = FALSE; + m_bMainXRefLoadTried = FALSE; + m_bDocAvail = FALSE; + m_bLinearized = FALSE; + m_bPagesLoad = FALSE; + m_bPagesTreeLoad = FALSE; + m_bMainXRefLoadedOK = FALSE; + m_bAnnotsLoad = FALSE; + m_bHaveAcroForm = FALSE; + m_bAcroFormLoad = FALSE; + m_bPageLoadedOK = FALSE; + m_bNeedDownLoadResource = FALSE; + m_bLinearizedFormParamLoad = FALSE; + m_pLinearized = NULL; + m_pRoot = NULL; + m_pTrailer = NULL; + m_pCurrentParser = NULL; + m_pAcroForm = NULL; + m_pPageDict = NULL; + m_pPageResource = NULL; + m_docStatus = PDF_DATAAVAIL_HEADER; + m_parser.m_bOwnFileRead = false; + m_bTotalLoadPageTree = FALSE; + m_bCurPageDictLoadOK = FALSE; + m_bLinearedDataOK = FALSE; + m_bSupportHintTable = bSupportHintTable; +} +CPDF_DataAvail::~CPDF_DataAvail() { + if (m_pLinearized) + m_pLinearized->Release(); + + if (m_pRoot) + m_pRoot->Release(); + + if (m_pTrailer) + m_pTrailer->Release(); + + int iSize = m_arrayAcroforms.GetSize(); + for (int i = 0; i < iSize; ++i) + m_arrayAcroforms.GetAt(i)->Release(); +} + +void CPDF_DataAvail::SetDocument(CPDF_Document* pDoc) { + m_pDocument = pDoc; +} + +FX_DWORD CPDF_DataAvail::GetObjectSize(FX_DWORD objnum, FX_FILESIZE& offset) { + CPDF_Parser* pParser = m_pDocument->GetParser(); + if (!pParser || !pParser->IsValidObjectNumber(objnum)) + return 0; + + if (pParser->GetObjectType(objnum) == 2) + objnum = pParser->GetObjectPositionOrZero(objnum); + + if (pParser->GetObjectType(objnum) != 1 && + pParser->GetObjectType(objnum) != 255) { + return 0; + } + + offset = pParser->GetObjectPositionOrZero(objnum); + if (offset == 0) + return 0; + + auto it = pParser->m_SortedOffset.find(offset); + if (it == pParser->m_SortedOffset.end() || + ++it == pParser->m_SortedOffset.end()) { + return 0; + } + return *it - offset; +} + +FX_BOOL CPDF_DataAvail::IsObjectsAvail( + CFX_ArrayTemplate<CPDF_Object*>& obj_array, + FX_BOOL bParsePage, + IPDF_DataAvail::DownloadHints* pHints, + CFX_ArrayTemplate<CPDF_Object*>& ret_array) { + if (!obj_array.GetSize()) + return TRUE; + + FX_DWORD count = 0; + CFX_ArrayTemplate<CPDF_Object*> new_obj_array; + int32_t i = 0; + for (i = 0; i < obj_array.GetSize(); i++) { + CPDF_Object* pObj = obj_array[i]; + if (!pObj) + continue; + + int32_t type = pObj->GetType(); + switch (type) { + case CPDF_Object::ARRAY: { + CPDF_Array* pArray = pObj->GetArray(); + for (FX_DWORD k = 0; k < pArray->GetCount(); ++k) + new_obj_array.Add(pArray->GetElement(k)); + } break; + case CPDF_Object::STREAM: + pObj = pObj->GetDict(); + case CPDF_Object::DICTIONARY: { + CPDF_Dictionary* pDict = pObj->GetDict(); + if (pDict && pDict->GetStringBy("Type") == "Page" && !bParsePage) + continue; + + for (const auto& it : *pDict) { + const CFX_ByteString& key = it.first; + CPDF_Object* value = it.second; + if (key != "Parent") + new_obj_array.Add(value); + } + } break; + case CPDF_Object::REFERENCE: { + CPDF_Reference* pRef = pObj->AsReference(); + FX_DWORD dwNum = pRef->GetRefObjNum(); + + FX_FILESIZE offset; + FX_DWORD size = GetObjectSize(dwNum, offset); + if (size == 0 || offset < 0 || offset >= m_dwFileLen) + break; + + if (!IsDataAvail(offset, size, pHints)) { + ret_array.Add(pObj); + count++; + } else if (!pdfium::ContainsKey(m_ObjectSet, dwNum)) { + m_ObjectSet.insert(dwNum); + CPDF_Object* pReferred = + m_pDocument->GetIndirectObject(pRef->GetRefObjNum()); + if (pReferred) + new_obj_array.Add(pReferred); + } + } break; + } + } + + if (count > 0) { + int32_t iSize = new_obj_array.GetSize(); + for (i = 0; i < iSize; ++i) { + CPDF_Object* pObj = new_obj_array[i]; + if (CPDF_Reference* pRef = pObj->AsReference()) { + FX_DWORD dwNum = pRef->GetRefObjNum(); + if (!pdfium::ContainsKey(m_ObjectSet, dwNum)) + ret_array.Add(pObj); + } else { + ret_array.Add(pObj); + } + } + return FALSE; + } + + obj_array.RemoveAll(); + obj_array.Append(new_obj_array); + return IsObjectsAvail(obj_array, FALSE, pHints, ret_array); +} + +IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail( + IPDF_DataAvail::DownloadHints* pHints) { + if (!m_dwFileLen && m_pFileRead) { + m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize(); + if (!m_dwFileLen) + return DataError; + } + + while (!m_bDocAvail) { + if (!CheckDocStatus(pHints)) + return DataNotAvailable; + } + + return DataAvailable; +} + +FX_BOOL CPDF_DataAvail::CheckAcroFormSubObject( + IPDF_DataAvail::DownloadHints* pHints) { + if (!m_objs_array.GetSize()) { + m_objs_array.RemoveAll(); + m_ObjectSet.clear(); + CFX_ArrayTemplate<CPDF_Object*> obj_array; + obj_array.Append(m_arrayAcroforms); + FX_BOOL bRet = IsObjectsAvail(obj_array, FALSE, pHints, m_objs_array); + if (bRet) + m_objs_array.RemoveAll(); + return bRet; + } + + CFX_ArrayTemplate<CPDF_Object*> new_objs_array; + FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); + if (bRet) { + int32_t iSize = m_arrayAcroforms.GetSize(); + for (int32_t i = 0; i < iSize; ++i) { + m_arrayAcroforms.GetAt(i)->Release(); + } + m_arrayAcroforms.RemoveAll(); + } else { + m_objs_array.RemoveAll(); + m_objs_array.Append(new_objs_array); + } + return bRet; +} + +FX_BOOL CPDF_DataAvail::CheckAcroForm(IPDF_DataAvail::DownloadHints* pHints) { + FX_BOOL bExist = FALSE; + m_pAcroForm = GetObject(m_dwAcroFormObjNum, pHints, &bExist); + if (!bExist) { + m_docStatus = PDF_DATAAVAIL_PAGETREE; + return TRUE; + } + + if (!m_pAcroForm) { + if (m_docStatus == PDF_DATAAVAIL_ERROR) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + return FALSE; + } + + m_arrayAcroforms.Add(m_pAcroForm); + m_docStatus = PDF_DATAAVAIL_PAGETREE; + return TRUE; +} + +FX_BOOL CPDF_DataAvail::CheckDocStatus(IPDF_DataAvail::DownloadHints* pHints) { + switch (m_docStatus) { + case PDF_DATAAVAIL_HEADER: + return CheckHeader(pHints); + case PDF_DATAAVAIL_FIRSTPAGE: + case PDF_DATAAVAIL_FIRSTPAGE_PREPARE: + return CheckFirstPage(pHints); + case PDF_DATAAVAIL_HINTTABLE: + return CheckHintTables(pHints); + case PDF_DATAAVAIL_END: + return CheckEnd(pHints); + case PDF_DATAAVAIL_CROSSREF: + return CheckCrossRef(pHints); + case PDF_DATAAVAIL_CROSSREF_ITEM: + return CheckCrossRefItem(pHints); + case PDF_DATAAVAIL_CROSSREF_STREAM: + return CheckAllCrossRefStream(pHints); + case PDF_DATAAVAIL_TRAILER: + return CheckTrailer(pHints); + case PDF_DATAAVAIL_TRAILER_APPEND: + return CheckTrailerAppend(pHints); + case PDF_DATAAVAIL_LOADALLCROSSREF: + return LoadAllXref(pHints); + case PDF_DATAAVAIL_LOADALLFILE: + return LoadAllFile(pHints); + case PDF_DATAAVAIL_ROOT: + return CheckRoot(pHints); + case PDF_DATAAVAIL_INFO: + return CheckInfo(pHints); + case PDF_DATAAVAIL_ACROFORM: + return CheckAcroForm(pHints); + case PDF_DATAAVAIL_PAGETREE: + if (m_bTotalLoadPageTree) + return CheckPages(pHints); + return LoadDocPages(pHints); + case PDF_DATAAVAIL_PAGE: + if (m_bTotalLoadPageTree) + return CheckPage(pHints); + m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD; + return TRUE; + case PDF_DATAAVAIL_ERROR: + return LoadAllFile(pHints); + case PDF_DATAAVAIL_PAGE_LATERLOAD: + m_docStatus = PDF_DATAAVAIL_PAGE; + default: + m_bDocAvail = TRUE; + return TRUE; + } +} + +FX_BOOL CPDF_DataAvail::CheckPageStatus(IPDF_DataAvail::DownloadHints* pHints) { + switch (m_docStatus) { + case PDF_DATAAVAIL_PAGETREE: + return CheckPages(pHints); + case PDF_DATAAVAIL_PAGE: + return CheckPage(pHints); + case PDF_DATAAVAIL_ERROR: + return LoadAllFile(pHints); + default: + m_bPagesTreeLoad = TRUE; + m_bPagesLoad = TRUE; + return TRUE; + } +} + +FX_BOOL CPDF_DataAvail::LoadAllFile(IPDF_DataAvail::DownloadHints* pHints) { + if (m_pFileAvail->IsDataAvail(0, (FX_DWORD)m_dwFileLen)) { + m_docStatus = PDF_DATAAVAIL_DONE; + return TRUE; + } + + pHints->AddSegment(0, (FX_DWORD)m_dwFileLen); + return FALSE; +} + +FX_BOOL CPDF_DataAvail::LoadAllXref(IPDF_DataAvail::DownloadHints* pHints) { + m_parser.m_pSyntax->InitParser(m_pFileRead, (FX_DWORD)m_dwHeaderOffset); + m_parser.m_bOwnFileRead = false; + if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) && + !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return FALSE; + } + + m_dwRootObjNum = m_parser.GetRootObjNum(); + m_dwInfoObjNum = m_parser.GetInfoObjNum(); + m_pCurrentParser = &m_parser; + m_docStatus = PDF_DATAAVAIL_ROOT; + return TRUE; +} + +CPDF_Object* CPDF_DataAvail::GetObject(FX_DWORD objnum, + IPDF_DataAvail::DownloadHints* pHints, + FX_BOOL* pExistInFile) { + CPDF_Object* pRet = nullptr; + FX_DWORD size = 0; + FX_FILESIZE offset = 0; + CPDF_Parser* pParser = nullptr; + + if (pExistInFile) + *pExistInFile = TRUE; + + if (m_pDocument) { + size = GetObjectSize(objnum, offset); + pParser = m_pDocument->GetParser(); + } else { + size = (FX_DWORD)m_parser.GetObjectSize(objnum); + offset = m_parser.GetObjectOffset(objnum); + pParser = &m_parser; + } + + if (!IsDataAvail(offset, size, pHints)) + return nullptr; + + if (pParser) + pRet = pParser->ParseIndirectObject(nullptr, objnum); + + if (!pRet && pExistInFile) + *pExistInFile = FALSE; + + return pRet; +} + +FX_BOOL CPDF_DataAvail::CheckInfo(IPDF_DataAvail::DownloadHints* pHints) { + FX_BOOL bExist = FALSE; + CPDF_Object* pInfo = GetObject(m_dwInfoObjNum, pHints, &bExist); + if (!bExist) { + m_docStatus = + (m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE); + return TRUE; + } + + if (!pInfo) { + if (m_docStatus == PDF_DATAAVAIL_ERROR) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + + if (m_Pos == m_dwFileLen) + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + if (pInfo) + pInfo->Release(); + + m_docStatus = + (m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE); + + return TRUE; +} + +FX_BOOL CPDF_DataAvail::CheckRoot(IPDF_DataAvail::DownloadHints* pHints) { + FX_BOOL bExist = FALSE; + m_pRoot = GetObject(m_dwRootObjNum, pHints, &bExist); + if (!bExist) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + + if (!m_pRoot) { + if (m_docStatus == PDF_DATAAVAIL_ERROR) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + return FALSE; + } + + CPDF_Dictionary* pDict = m_pRoot->GetDict(); + if (!pDict) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + CPDF_Reference* pRef = ToReference(pDict->GetElement("Pages")); + if (!pRef) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + m_PagesObjNum = pRef->GetRefObjNum(); + CPDF_Reference* pAcroFormRef = + ToReference(m_pRoot->GetDict()->GetElement("AcroForm")); + if (pAcroFormRef) { + m_bHaveAcroForm = TRUE; + m_dwAcroFormObjNum = pAcroFormRef->GetRefObjNum(); + } + + if (m_dwInfoObjNum) { + m_docStatus = PDF_DATAAVAIL_INFO; + } else { + m_docStatus = + m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE; + } + return TRUE; +} + +FX_BOOL CPDF_DataAvail::PreparePageItem() { + CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); + CPDF_Reference* pRef = + ToReference(pRoot ? pRoot->GetElement("Pages") : nullptr); + if (!pRef) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + m_PagesObjNum = pRef->GetRefObjNum(); + m_pCurrentParser = m_pDocument->GetParser(); + m_docStatus = PDF_DATAAVAIL_PAGETREE; + return TRUE; +} + +bool CPDF_DataAvail::IsFirstCheck(int iPage) { + return m_pageMapCheckState.insert(iPage).second; +} + +void CPDF_DataAvail::ResetFirstCheck(int iPage) { + m_pageMapCheckState.erase(iPage); +} + +FX_BOOL CPDF_DataAvail::CheckPage(IPDF_DataAvail::DownloadHints* pHints) { + FX_DWORD iPageObjs = m_PageObjList.GetSize(); + CFX_DWordArray UnavailObjList; + for (FX_DWORD i = 0; i < iPageObjs; ++i) { + FX_DWORD dwPageObjNum = m_PageObjList.GetAt(i); + FX_BOOL bExist = FALSE; + CPDF_Object* pObj = GetObject(dwPageObjNum, pHints, &bExist); + if (!pObj) { + if (bExist) + UnavailObjList.Add(dwPageObjNum); + continue; + } + + if (pObj->IsArray()) { + CPDF_Array* pArray = pObj->GetArray(); + if (pArray) { + int32_t iSize = pArray->GetCount(); + for (int32_t j = 0; j < iSize; ++j) { + if (CPDF_Reference* pRef = ToReference(pArray->GetElement(j))) + UnavailObjList.Add(pRef->GetRefObjNum()); + } + } + } + + if (!pObj->IsDictionary()) { + pObj->Release(); + continue; + } + + CFX_ByteString type = pObj->GetDict()->GetStringBy("Type"); + if (type == "Pages") { + m_PagesArray.Add(pObj); + continue; + } + pObj->Release(); + } + + m_PageObjList.RemoveAll(); + if (UnavailObjList.GetSize()) { + m_PageObjList.Append(UnavailObjList); + return FALSE; + } + + FX_DWORD iPages = m_PagesArray.GetSize(); + for (FX_DWORD i = 0; i < iPages; i++) { + CPDF_Object* pPages = m_PagesArray.GetAt(i); + if (!pPages) + continue; + + if (!GetPageKids(m_pCurrentParser, pPages)) { + pPages->Release(); + while (++i < iPages) { + pPages = m_PagesArray.GetAt(i); + pPages->Release(); + } + m_PagesArray.RemoveAll(); + + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + pPages->Release(); + } + + m_PagesArray.RemoveAll(); + if (!m_PageObjList.GetSize()) + m_docStatus = PDF_DATAAVAIL_DONE; + return TRUE; +} + +FX_BOOL CPDF_DataAvail::GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages) { + if (!pParser) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + CPDF_Dictionary* pDict = pPages->GetDict(); + CPDF_Object* pKids = pDict ? pDict->GetElement("Kids") : NULL; + if (!pKids) + return TRUE; + + switch (pKids->GetType()) { + case CPDF_Object::REFERENCE: + m_PageObjList.Add(pKids->AsReference()->GetRefObjNum()); + break; + case CPDF_Object::ARRAY: { + CPDF_Array* pKidsArray = pKids->AsArray(); + for (FX_DWORD i = 0; i < pKidsArray->GetCount(); ++i) { + if (CPDF_Reference* pRef = ToReference(pKidsArray->GetElement(i))) + m_PageObjList.Add(pRef->GetRefObjNum()); + } + } break; + default: + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + return TRUE; +} + +FX_BOOL CPDF_DataAvail::CheckPages(IPDF_DataAvail::DownloadHints* pHints) { + FX_BOOL bExist = FALSE; + CPDF_Object* pPages = GetObject(m_PagesObjNum, pHints, &bExist); + if (!bExist) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + + if (!pPages) { + if (m_docStatus == PDF_DATAAVAIL_ERROR) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + return FALSE; + } + + if (!GetPageKids(m_pCurrentParser, pPages)) { + pPages->Release(); + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + pPages->Release(); + m_docStatus = PDF_DATAAVAIL_PAGE; + return TRUE; +} + +FX_BOOL CPDF_DataAvail::CheckHeader(IPDF_DataAvail::DownloadHints* pHints) { + FX_DWORD req_size = 1024; + if ((FX_FILESIZE)req_size > m_dwFileLen) + req_size = (FX_DWORD)m_dwFileLen; + + if (m_pFileAvail->IsDataAvail(0, req_size)) { + uint8_t buffer[1024]; + m_pFileRead->ReadBlock(buffer, 0, req_size); + + if (IsLinearizedFile(buffer, req_size)) { + m_docStatus = PDF_DATAAVAIL_FIRSTPAGE; + } else { + if (m_docStatus == PDF_DATAAVAIL_ERROR) + return FALSE; + m_docStatus = PDF_DATAAVAIL_END; + } + return TRUE; + } + + pHints->AddSegment(0, req_size); + return FALSE; +} + +FX_BOOL CPDF_DataAvail::CheckFirstPage(IPDF_DataAvail::DownloadHints* pHints) { + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + CPDF_Object* pEndOffSet = pDict ? pDict->GetElement("E") : NULL; + if (!pEndOffSet) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + CPDF_Object* pXRefOffset = pDict ? pDict->GetElement("T") : NULL; + if (!pXRefOffset) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + CPDF_Object* pFileLen = pDict ? pDict->GetElement("L") : NULL; + if (!pFileLen) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + FX_BOOL bNeedDownLoad = FALSE; + if (pEndOffSet->IsNumber()) { + FX_DWORD dwEnd = pEndOffSet->GetInteger(); + dwEnd += 512; + if ((FX_FILESIZE)dwEnd > m_dwFileLen) + dwEnd = (FX_DWORD)m_dwFileLen; + + int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen); + int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0; + if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) { + pHints->AddSegment(iStartPos, iSize); + bNeedDownLoad = TRUE; + } + } + + m_dwLastXRefOffset = 0; + FX_FILESIZE dwFileLen = 0; + if (pXRefOffset->IsNumber()) + m_dwLastXRefOffset = pXRefOffset->GetInteger(); + + if (pFileLen->IsNumber()) + dwFileLen = pFileLen->GetInteger(); + + if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, + (FX_DWORD)(dwFileLen - m_dwLastXRefOffset))) { + if (m_docStatus == PDF_DATAAVAIL_FIRSTPAGE) { + FX_DWORD dwSize = (FX_DWORD)(dwFileLen - m_dwLastXRefOffset); + FX_FILESIZE offset = m_dwLastXRefOffset; + if (dwSize < 512 && dwFileLen > 512) { + dwSize = 512; + offset = dwFileLen - 512; + } + pHints->AddSegment(offset, dwSize); + } + } else { + m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE; + } + + if (bNeedDownLoad || m_docStatus != PDF_DATAAVAIL_FIRSTPAGE_PREPARE) { + m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE; + return FALSE; + } + + m_docStatus = + m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE; + return TRUE; +} + +FX_BOOL CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset, + FX_DWORD size, + IPDF_DataAvail::DownloadHints* pHints) { + if (offset > m_dwFileLen) + return TRUE; + + FX_SAFE_DWORD safeSize = pdfium::base::checked_cast<FX_DWORD>(offset); + safeSize += size; + safeSize += 512; + if (!safeSize.IsValid() || safeSize.ValueOrDie() > m_dwFileLen) + size = m_dwFileLen - offset; + else + size += 512; + + if (!m_pFileAvail->IsDataAvail(offset, size)) { + pHints->AddSegment(offset, size); + return FALSE; + } + return TRUE; +} + +FX_BOOL CPDF_DataAvail::CheckHintTables(IPDF_DataAvail::DownloadHints* pHints) { + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + if (!pDict) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + if (!pDict->KeyExist("H") || !pDict->KeyExist("O") || !pDict->KeyExist("N")) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + int nPageCount = pDict->GetElementValue("N")->GetInteger(); + if (nPageCount <= 1) { + m_docStatus = PDF_DATAAVAIL_DONE; + return TRUE; + } + + CPDF_Array* pHintStreamRange = pDict->GetArrayBy("H"); + FX_FILESIZE szHSStart = + pHintStreamRange->GetElementValue(0) + ? pHintStreamRange->GetElementValue(0)->GetInteger() + : 0; + FX_FILESIZE szHSLength = + pHintStreamRange->GetElementValue(1) + ? pHintStreamRange->GetElementValue(1)->GetInteger() + : 0; + if (szHSStart < 0 || szHSLength <= 0) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + if (!IsDataAvail(szHSStart, szHSLength, pHints)) + return FALSE; + + m_syntaxParser.InitParser(m_pFileRead, m_dwHeaderOffset); + + std::unique_ptr<CPDF_HintTables> pHintTables( + new CPDF_HintTables(this, pDict)); + std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pHintStream( + ParseIndirectObjectAt(szHSStart, 0)); + CPDF_Stream* pStream = ToStream(pHintStream.get()); + if (pStream && pHintTables->LoadHintStream(pStream)) + m_pHintTables = std::move(pHintTables); + + m_docStatus = PDF_DATAAVAIL_DONE; + return TRUE; +} + +CPDF_Object* CPDF_DataAvail::ParseIndirectObjectAt( + FX_FILESIZE pos, + FX_DWORD objnum, + CPDF_IndirectObjectHolder* pObjList) { + FX_FILESIZE SavedPos = m_syntaxParser.SavePos(); + m_syntaxParser.RestorePos(pos); + + bool bIsNumber; + CFX_ByteString word = m_syntaxParser.GetNextWord(&bIsNumber); + if (!bIsNumber) + return nullptr; + + FX_DWORD parser_objnum = FXSYS_atoui(word); + if (objnum && parser_objnum != objnum) + return nullptr; + + word = m_syntaxParser.GetNextWord(&bIsNumber); + if (!bIsNumber) + return nullptr; + + FX_DWORD gennum = FXSYS_atoui(word); + if (m_syntaxParser.GetKeyword() != "obj") { + m_syntaxParser.RestorePos(SavedPos); + return nullptr; + } + + CPDF_Object* pObj = + m_syntaxParser.GetObject(pObjList, parser_objnum, gennum, true); + m_syntaxParser.RestorePos(SavedPos); + return pObj; +} + +IPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() { + FX_DWORD req_size = 1024; + if (!m_pFileAvail->IsDataAvail(0, req_size)) + return LinearizationUnknown; + + if (!m_pFileRead) + return NotLinearized; + + FX_FILESIZE dwSize = m_pFileRead->GetSize(); + if (dwSize < (FX_FILESIZE)req_size) + return LinearizationUnknown; + + uint8_t buffer[1024]; + m_pFileRead->ReadBlock(buffer, 0, req_size); + if (IsLinearizedFile(buffer, req_size)) + return Linearized; + + return NotLinearized; +} +FX_BOOL CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, FX_DWORD dwLen) { + ScopedFileStream file(FX_CreateMemoryStream(pData, (size_t)dwLen, FALSE)); + + int32_t offset = GetHeaderOffset(file.get()); + if (offset == -1) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + m_dwHeaderOffset = offset; + m_syntaxParser.InitParser(file.get(), offset); + m_syntaxParser.RestorePos(m_syntaxParser.m_HeaderOffset + 9); + + bool bNumber; + CFX_ByteString wordObjNum = m_syntaxParser.GetNextWord(&bNumber); + if (!bNumber) + return FALSE; + + FX_DWORD objnum = FXSYS_atoui(wordObjNum); + if (m_pLinearized) { + m_pLinearized->Release(); + m_pLinearized = nullptr; + } + + m_pLinearized = + ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum); + if (!m_pLinearized) + return FALSE; + + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + if (pDict && pDict->GetElement("Linearized")) { + CPDF_Object* pLen = pDict->GetElement("L"); + if (!pLen) + return FALSE; + + if ((FX_FILESIZE)pLen->GetInteger() != m_pFileRead->GetSize()) + return FALSE; + + m_bLinearized = TRUE; + + if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P"))) + m_dwFirstPageNo = pNo->GetInteger(); + + return TRUE; + } + return FALSE; +} + +FX_BOOL CPDF_DataAvail::CheckEnd(IPDF_DataAvail::DownloadHints* pHints) { + FX_DWORD req_pos = (FX_DWORD)(m_dwFileLen > 1024 ? m_dwFileLen - 1024 : 0); + FX_DWORD dwSize = (FX_DWORD)(m_dwFileLen - req_pos); + + if (m_pFileAvail->IsDataAvail(req_pos, dwSize)) { + uint8_t buffer[1024]; + m_pFileRead->ReadBlock(buffer, req_pos, dwSize); + + ScopedFileStream file(FX_CreateMemoryStream(buffer, (size_t)dwSize, FALSE)); + m_syntaxParser.InitParser(file.get(), 0); + m_syntaxParser.RestorePos(dwSize - 1); + + if (m_syntaxParser.SearchWord("startxref", TRUE, FALSE, dwSize)) { + m_syntaxParser.GetNextWord(nullptr); + + bool bNumber; + CFX_ByteString xrefpos_str = m_syntaxParser.GetNextWord(&bNumber); + if (!bNumber) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + m_dwXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str); + if (!m_dwXRefOffset || m_dwXRefOffset > m_dwFileLen) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + + m_dwLastXRefOffset = m_dwXRefOffset; + SetStartOffset(m_dwXRefOffset); + m_docStatus = PDF_DATAAVAIL_CROSSREF; + return TRUE; + } + + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + + pHints->AddSegment(req_pos, dwSize); + return FALSE; +} + +int32_t CPDF_DataAvail::CheckCrossRefStream( + IPDF_DataAvail::DownloadHints* pHints, + FX_FILESIZE& xref_offset) { + xref_offset = 0; + FX_DWORD req_size = + (FX_DWORD)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); + + if (m_pFileAvail->IsDataAvail(m_Pos, req_size)) { + int32_t iSize = (int32_t)(m_Pos + req_size - m_dwCurrentXRefSteam); + CFX_BinaryBuf buf(iSize); + uint8_t* pBuf = buf.GetBuffer(); + + m_pFileRead->ReadBlock(pBuf, m_dwCurrentXRefSteam, iSize); + + ScopedFileStream file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE)); + m_parser.m_pSyntax->InitParser(file.get(), 0); + + bool bNumber; + CFX_ByteString objnum = m_parser.m_pSyntax->GetNextWord(&bNumber); + if (!bNumber) + return -1; + + FX_DWORD objNum = FXSYS_atoui(objnum); + CPDF_Object* pObj = m_parser.ParseIndirectObjectAt(nullptr, 0, objNum); + if (!pObj) { + m_Pos += m_parser.m_pSyntax->SavePos(); + return 0; + } + + CPDF_Dictionary* pDict = pObj->GetDict(); + CPDF_Name* pName = ToName(pDict ? pDict->GetElement("Type") : nullptr); + if (pName) { + if (pName->GetString() == "XRef") { + m_Pos += m_parser.m_pSyntax->SavePos(); + xref_offset = pObj->GetDict()->GetIntegerBy("Prev"); + pObj->Release(); + return 1; + } + } + pObj->Release(); + return -1; + } + pHints->AddSegment(m_Pos, req_size); + return 0; +} + +inline void CPDF_DataAvail::SetStartOffset(FX_FILESIZE dwOffset) { + m_Pos = dwOffset; +} + +FX_BOOL CPDF_DataAvail::GetNextToken(CFX_ByteString& token) { + uint8_t ch; + if (!GetNextChar(ch)) + return FALSE; + + while (1) { + while (PDFCharIsWhitespace(ch)) { + if (!GetNextChar(ch)) + return FALSE; + } + + if (ch != '%') + break; + + while (1) { + if (!GetNextChar(ch)) + return FALSE; + if (PDFCharIsLineEnding(ch)) + break; + } + } + + uint8_t buffer[256]; + FX_DWORD index = 0; + if (PDFCharIsDelimiter(ch)) { + buffer[index++] = ch; + if (ch == '/') { + while (1) { + if (!GetNextChar(ch)) + return FALSE; + + if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { + m_Pos--; + CFX_ByteString ret(buffer, index); + token = ret; + return TRUE; + } + + if (index < sizeof(buffer)) + buffer[index++] = ch; + } + } else if (ch == '<') { + if (!GetNextChar(ch)) + return FALSE; + + if (ch == '<') + buffer[index++] = ch; + else + m_Pos--; + } else if (ch == '>') { + if (!GetNextChar(ch)) + return FALSE; + + if (ch == '>') + buffer[index++] = ch; + else + m_Pos--; + } + + CFX_ByteString ret(buffer, index); + token = ret; + return TRUE; + } + + while (1) { + if (index < sizeof(buffer)) + buffer[index++] = ch; + + if (!GetNextChar(ch)) + return FALSE; + + if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { + m_Pos--; + break; + } + } + + token = CFX_ByteString(buffer, index); + return TRUE; +} + +FX_BOOL CPDF_DataAvail::GetNextChar(uint8_t& ch) { + FX_FILESIZE pos = m_Pos; + if (pos >= m_dwFileLen) + return FALSE; + + if (m_bufferOffset >= pos || + (FX_FILESIZE)(m_bufferOffset + m_bufferSize) <= pos) { + FX_FILESIZE read_pos = pos; + FX_DWORD read_size = 512; + if ((FX_FILESIZE)read_size > m_dwFileLen) + read_size = (FX_DWORD)m_dwFileLen; + + if ((FX_FILESIZE)(read_pos + read_size) > m_dwFileLen) + read_pos = m_dwFileLen - read_size; + + if (!m_pFileRead->ReadBlock(m_bufferData, read_pos, read_size)) + return FALSE; + + m_bufferOffset = read_pos; + m_bufferSize = read_size; + } + ch = m_bufferData[pos - m_bufferOffset]; + m_Pos++; + return TRUE; +} + +FX_BOOL CPDF_DataAvail::CheckCrossRefItem( + IPDF_DataAvail::DownloadHints* pHints) { + int32_t iSize = 0; + CFX_ByteString token; + while (1) { + if (!GetNextToken(token)) { + iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); + pHints->AddSegment(m_Pos, iSize); + return FALSE; + } + + if (token == "trailer") { + m_dwTrailerOffset = m_Pos; + m_docStatus = PDF_DATAAVAIL_TRAILER; + return TRUE; + } + } +} + +FX_BOOL CPDF_DataAvail::CheckAllCrossRefStream( + IPDF_DataAvail::DownloadHints* pHints) { + FX_FILESIZE xref_offset = 0; + + int32_t nRet = CheckCrossRefStream(pHints, xref_offset); + if (nRet == 1) { + if (!xref_offset) { + m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF; + } else { + m_dwCurrentXRefSteam = xref_offset; + m_Pos = xref_offset; + } + return TRUE; + } + + if (nRet == -1) + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; +} + +FX_BOOL CPDF_DataAvail::CheckCrossRef(IPDF_DataAvail::DownloadHints* pHints) { + int32_t iSize = 0; + CFX_ByteString token; + if (!GetNextToken(token)) { + iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); + pHints->AddSegment(m_Pos, iSize); + return FALSE; + } + + if (token == "xref") { + while (1) { + if (!GetNextToken(token)) { + iSize = + (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); + pHints->AddSegment(m_Pos, iSize); + m_docStatus = PDF_DATAAVAIL_CROSSREF_ITEM; + return FALSE; + } + + if (token == "trailer") { + m_dwTrailerOffset = m_Pos; + m_docStatus = PDF_DATAAVAIL_TRAILER; + return TRUE; + } + } + } else { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + return FALSE; +} + +FX_BOOL CPDF_DataAvail::CheckTrailerAppend( + IPDF_DataAvail::DownloadHints* pHints) { + if (m_Pos < m_dwFileLen) { + FX_FILESIZE dwAppendPos = m_Pos + m_syntaxParser.SavePos(); + int32_t iSize = (int32_t)( + dwAppendPos + 512 > m_dwFileLen ? m_dwFileLen - dwAppendPos : 512); + + if (!m_pFileAvail->IsDataAvail(dwAppendPos, iSize)) { + pHints->AddSegment(dwAppendPos, iSize); + return FALSE; + } + } + + if (m_dwPrevXRefOffset) { + SetStartOffset(m_dwPrevXRefOffset); + m_docStatus = PDF_DATAAVAIL_CROSSREF; + } else { + m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF; + } + return TRUE; +} + +FX_BOOL CPDF_DataAvail::CheckTrailer(IPDF_DataAvail::DownloadHints* pHints) { + int32_t iTrailerSize = + (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); + if (m_pFileAvail->IsDataAvail(m_Pos, iTrailerSize)) { + int32_t iSize = (int32_t)(m_Pos + iTrailerSize - m_dwTrailerOffset); + CFX_BinaryBuf buf(iSize); + uint8_t* pBuf = buf.GetBuffer(); + if (!pBuf) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + if (!m_pFileRead->ReadBlock(pBuf, m_dwTrailerOffset, iSize)) + return FALSE; + + ScopedFileStream file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE)); + m_syntaxParser.InitParser(file.get(), 0); + + std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pTrailer( + m_syntaxParser.GetObject(nullptr, 0, 0, true)); + if (!pTrailer) { + m_Pos += m_syntaxParser.SavePos(); + pHints->AddSegment(m_Pos, iTrailerSize); + return FALSE; + } + + if (!pTrailer->IsDictionary()) + return FALSE; + + CPDF_Dictionary* pTrailerDict = pTrailer->GetDict(); + CPDF_Object* pEncrypt = pTrailerDict->GetElement("Encrypt"); + if (ToReference(pEncrypt)) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + return TRUE; + } + + FX_DWORD xrefpos = GetDirectInteger(pTrailerDict, "Prev"); + if (xrefpos) { + m_dwPrevXRefOffset = GetDirectInteger(pTrailerDict, "XRefStm"); + if (m_dwPrevXRefOffset) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + } else { + m_dwPrevXRefOffset = xrefpos; + if (m_dwPrevXRefOffset >= m_dwFileLen) { + m_docStatus = PDF_DATAAVAIL_LOADALLFILE; + } else { + SetStartOffset(m_dwPrevXRefOffset); + m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND; + } + } + return TRUE; + } + m_dwPrevXRefOffset = 0; + m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND; + return TRUE; + } + pHints->AddSegment(m_Pos, iTrailerSize); + return FALSE; +} + +FX_BOOL CPDF_DataAvail::CheckPage(int32_t iPage, + IPDF_DataAvail::DownloadHints* pHints) { + while (TRUE) { + switch (m_docStatus) { + case PDF_DATAAVAIL_PAGETREE: + if (!LoadDocPages(pHints)) + return FALSE; + break; + case PDF_DATAAVAIL_PAGE: + if (!LoadDocPage(iPage, pHints)) + return FALSE; + break; + case PDF_DATAAVAIL_ERROR: + return LoadAllFile(pHints); + default: + m_bPagesTreeLoad = TRUE; + m_bPagesLoad = TRUE; + m_bCurPageDictLoadOK = TRUE; + m_docStatus = PDF_DATAAVAIL_PAGE; + return TRUE; + } + } +} + +FX_BOOL CPDF_DataAvail::CheckArrayPageNode( + FX_DWORD dwPageNo, + CPDF_DataAvail::PageNode* pPageNode, + IPDF_DataAvail::DownloadHints* pHints) { + FX_BOOL bExist = FALSE; + CPDF_Object* pPages = GetObject(dwPageNo, pHints, &bExist); + if (!bExist) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + if (!pPages) { + if (m_docStatus == PDF_DATAAVAIL_ERROR) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + return FALSE; + } + + CPDF_Array* pArray = pPages->AsArray(); + if (!pArray) { + pPages->Release(); + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + pPageNode->m_type = PDF_PAGENODE_PAGES; + for (FX_DWORD i = 0; i < pArray->GetCount(); ++i) { + CPDF_Reference* pKid = ToReference(pArray->GetElement(i)); + if (!pKid) + continue; + + PageNode* pNode = new PageNode(); + pPageNode->m_childNode.Add(pNode); + pNode->m_dwPageNo = pKid->GetRefObjNum(); + } + pPages->Release(); + return TRUE; +} + +FX_BOOL CPDF_DataAvail::CheckUnkownPageNode( + FX_DWORD dwPageNo, + CPDF_DataAvail::PageNode* pPageNode, + IPDF_DataAvail::DownloadHints* pHints) { + FX_BOOL bExist = FALSE; + CPDF_Object* pPage = GetObject(dwPageNo, pHints, &bExist); + if (!bExist) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + if (!pPage) { + if (m_docStatus == PDF_DATAAVAIL_ERROR) + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + if (pPage->IsArray()) { + pPageNode->m_dwPageNo = dwPageNo; + pPageNode->m_type = PDF_PAGENODE_ARRAY; + pPage->Release(); + return TRUE; + } + + if (!pPage->IsDictionary()) { + pPage->Release(); + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + pPageNode->m_dwPageNo = dwPageNo; + CPDF_Dictionary* pDict = pPage->GetDict(); + CFX_ByteString type = pDict->GetStringBy("Type"); + if (type == "Pages") { + pPageNode->m_type = PDF_PAGENODE_PAGES; + CPDF_Object* pKids = pDict->GetElement("Kids"); + if (!pKids) { + m_docStatus = PDF_DATAAVAIL_PAGE; + return TRUE; + } + + switch (pKids->GetType()) { + case CPDF_Object::REFERENCE: { + CPDF_Reference* pKid = pKids->AsReference(); + PageNode* pNode = new PageNode(); + pPageNode->m_childNode.Add(pNode); + pNode->m_dwPageNo = pKid->GetRefObjNum(); + } break; + case CPDF_Object::ARRAY: { + CPDF_Array* pKidsArray = pKids->AsArray(); + for (FX_DWORD i = 0; i < pKidsArray->GetCount(); ++i) { + CPDF_Reference* pKid = ToReference(pKidsArray->GetElement(i)); + if (!pKid) + continue; + + PageNode* pNode = new PageNode(); + pPageNode->m_childNode.Add(pNode); + pNode->m_dwPageNo = pKid->GetRefObjNum(); + } + } break; + default: + break; + } + } else if (type == "Page") { + pPageNode->m_type = PDF_PAGENODE_PAGE; + } else { + pPage->Release(); + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + pPage->Release(); + return TRUE; +} + +FX_BOOL CPDF_DataAvail::CheckPageNode(CPDF_DataAvail::PageNode& pageNodes, + int32_t iPage, + int32_t& iCount, + IPDF_DataAvail::DownloadHints* pHints, + int level) { + if (level >= kMaxPageRecursionDepth) + return FALSE; + + int32_t iSize = pageNodes.m_childNode.GetSize(); + if (iSize <= 0 || iPage >= iSize) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + for (int32_t i = 0; i < iSize; ++i) { + PageNode* pNode = pageNodes.m_childNode.GetAt(i); + if (!pNode) + continue; + + switch (pNode->m_type) { + case PDF_PAGENODE_UNKNOWN: + if (!CheckUnkownPageNode(pNode->m_dwPageNo, pNode, pHints)) { + return FALSE; + } + --i; + break; + case PDF_PAGENODE_PAGE: + iCount++; + if (iPage == iCount && m_pDocument) + m_pDocument->m_PageList.SetAt(iPage, pNode->m_dwPageNo); + break; + case PDF_PAGENODE_PAGES: + if (!CheckPageNode(*pNode, iPage, iCount, pHints, level + 1)) + return FALSE; + break; + case PDF_PAGENODE_ARRAY: + if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode, pHints)) + return FALSE; + --i; + break; + } + + if (iPage == iCount) { + m_docStatus = PDF_DATAAVAIL_DONE; + return TRUE; + } + } + return TRUE; +} + +FX_BOOL CPDF_DataAvail::LoadDocPage(int32_t iPage, + IPDF_DataAvail::DownloadHints* pHints) { + if (m_pDocument->GetPageCount() <= iPage || + m_pDocument->m_PageList.GetAt(iPage)) { + m_docStatus = PDF_DATAAVAIL_DONE; + return TRUE; + } + + if (m_pageNodes.m_type == PDF_PAGENODE_PAGE) { + if (iPage == 0) { + m_docStatus = PDF_DATAAVAIL_DONE; + return TRUE; + } + m_docStatus = PDF_DATAAVAIL_ERROR; + return TRUE; + } + int32_t iCount = -1; + return CheckPageNode(m_pageNodes, iPage, iCount, pHints, 0); +} + +FX_BOOL CPDF_DataAvail::CheckPageCount(IPDF_DataAvail::DownloadHints* pHints) { + FX_BOOL bExist = FALSE; + CPDF_Object* pPages = GetObject(m_PagesObjNum, pHints, &bExist); + if (!bExist) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + if (!pPages) + return FALSE; + + CPDF_Dictionary* pPagesDict = pPages->GetDict(); + if (!pPagesDict) { + pPages->Release(); + m_docStatus = PDF_DATAAVAIL_ERROR; + return FALSE; + } + + if (!pPagesDict->KeyExist("Kids")) { + pPages->Release(); + return TRUE; + } + + int count = pPagesDict->GetIntegerBy("Count"); + if (count > 0) { + pPages->Release(); + return TRUE; + } + + pPages->Release(); + return FALSE; +} + +FX_BOOL CPDF_DataAvail::LoadDocPages(IPDF_DataAvail::DownloadHints* pHints) { + if (!CheckUnkownPageNode(m_PagesObjNum, &m_pageNodes, pHints)) + return FALSE; + + if (CheckPageCount(pHints)) { + m_docStatus = PDF_DATAAVAIL_PAGE; + return TRUE; + } + + m_bTotalLoadPageTree = TRUE; + return FALSE; +} + +FX_BOOL CPDF_DataAvail::LoadPages(IPDF_DataAvail::DownloadHints* pHints) { + while (!m_bPagesTreeLoad) { + if (!CheckPageStatus(pHints)) + return FALSE; + } + + if (m_bPagesLoad) + return TRUE; + + m_pDocument->LoadPages(); + return FALSE; +} + +IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData( + IPDF_DataAvail::DownloadHints* pHints) { + if (m_bLinearedDataOK) + return DataAvailable; + + if (!m_bMainXRefLoadTried) { + FX_SAFE_DWORD data_size = m_dwFileLen; + data_size -= m_dwLastXRefOffset; + if (!data_size.IsValid()) + return DataError; + + if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, + data_size.ValueOrDie())) { + pHints->AddSegment(m_dwLastXRefOffset, data_size.ValueOrDie()); + return DataNotAvailable; + } + + CPDF_Parser::Error eRet = + m_pDocument->GetParser()->LoadLinearizedMainXRefTable(); + m_bMainXRefLoadTried = TRUE; + if (eRet != CPDF_Parser::SUCCESS) + return DataError; + + if (!PreparePageItem()) + return DataNotAvailable; + + m_bMainXRefLoadedOK = TRUE; + m_bLinearedDataOK = TRUE; + } + + return m_bLinearedDataOK ? DataAvailable : DataNotAvailable; +} + +FX_BOOL CPDF_DataAvail::CheckPageAnnots(int32_t iPage, + IPDF_DataAvail::DownloadHints* pHints) { + if (!m_objs_array.GetSize()) { + m_objs_array.RemoveAll(); + m_ObjectSet.clear(); + + CPDF_Dictionary* pPageDict = m_pDocument->GetPage(iPage); + if (!pPageDict) + return TRUE; + + CPDF_Object* pAnnots = pPageDict->GetElement("Annots"); + if (!pAnnots) + return TRUE; + + CFX_ArrayTemplate<CPDF_Object*> obj_array; + obj_array.Add(pAnnots); + + FX_BOOL bRet = IsObjectsAvail(obj_array, FALSE, pHints, m_objs_array); + if (bRet) + m_objs_array.RemoveAll(); + + return bRet; + } + + CFX_ArrayTemplate<CPDF_Object*> new_objs_array; + FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); + m_objs_array.RemoveAll(); + if (!bRet) + m_objs_array.Append(new_objs_array); + + return bRet; +} + +IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedFirstPage( + int32_t iPage, + IPDF_DataAvail::DownloadHints* pHints) { + if (!m_bAnnotsLoad) { + if (!CheckPageAnnots(iPage, pHints)) + return DataNotAvailable; + m_bAnnotsLoad = TRUE; + } + + DocAvailStatus nRet = CheckLinearizedData(pHints); + if (nRet == DataAvailable) + m_bPageLoadedOK = FALSE; + return nRet; +} + +FX_BOOL CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary* pDict) { + CFX_AutoRestorer<int> restorer(&s_CurrentDataAvailRecursionDepth); + if (++s_CurrentDataAvailRecursionDepth > kMaxDataAvailRecursionDepth) + return FALSE; + + CPDF_Object* pParent = pDict->GetElement("Parent"); + if (!pParent) + return FALSE; + + CPDF_Dictionary* pParentDict = pParent->GetDict(); + if (!pParentDict) + return FALSE; + + CPDF_Object* pRet = pParentDict->GetElement("Resources"); + if (pRet) { + m_pPageResource = pRet; + return TRUE; + } + + return HaveResourceAncestor(pParentDict); +} + +IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail( + int32_t iPage, + IPDF_DataAvail::DownloadHints* pHints) { + if (!m_pDocument) + return DataError; + + if (IsFirstCheck(iPage)) { + m_bCurPageDictLoadOK = FALSE; + m_bPageLoadedOK = FALSE; + m_bAnnotsLoad = FALSE; + m_bNeedDownLoadResource = FALSE; + m_objs_array.RemoveAll(); + m_ObjectSet.clear(); + } + + if (pdfium::ContainsKey(m_pagesLoadState, iPage)) + return DataAvailable; + + if (m_bLinearized) { + if ((FX_DWORD)iPage == m_dwFirstPageNo) { + DocAvailStatus nRet = CheckLinearizedFirstPage(iPage, pHints); + if (nRet == DataAvailable) + m_pagesLoadState.insert(iPage); + return nRet; + } + + DocAvailStatus nResult = CheckLinearizedData(pHints); + if (nResult != DataAvailable) + return nResult; + + if (m_pHintTables) { + nResult = m_pHintTables->CheckPage(iPage, pHints); + if (nResult != DataAvailable) + return nResult; + m_pagesLoadState.insert(iPage); + return DataAvailable; + } + + if (m_bMainXRefLoadedOK) { + if (m_bTotalLoadPageTree) { + if (!LoadPages(pHints)) + return DataNotAvailable; + } else { + if (!m_bCurPageDictLoadOK && !CheckPage(iPage, pHints)) + return DataNotAvailable; + } + } else { + if (!LoadAllFile(pHints)) + return DataNotAvailable; + m_pDocument->GetParser()->RebuildCrossRef(); + ResetFirstCheck(iPage); + return DataAvailable; + } + } else { + if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && + !CheckPage(iPage, pHints)) { + return DataNotAvailable; + } + } + + if (m_bHaveAcroForm && !m_bAcroFormLoad) { + if (!CheckAcroFormSubObject(pHints)) + return DataNotAvailable; + m_bAcroFormLoad = TRUE; + } + + if (!m_bPageLoadedOK) { + if (!m_objs_array.GetSize()) { + m_objs_array.RemoveAll(); + m_ObjectSet.clear(); + + m_pPageDict = m_pDocument->GetPage(iPage); + if (!m_pPageDict) { + ResetFirstCheck(iPage); + return DataAvailable; + } + + CFX_ArrayTemplate<CPDF_Object*> obj_array; + obj_array.Add(m_pPageDict); + FX_BOOL bRet = IsObjectsAvail(obj_array, TRUE, pHints, m_objs_array); + if (!bRet) + return DataNotAvailable; + + m_objs_array.RemoveAll(); + } else { + CFX_ArrayTemplate<CPDF_Object*> new_objs_array; + FX_BOOL bRet = + IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); + + m_objs_array.RemoveAll(); + if (!bRet) { + m_objs_array.Append(new_objs_array); + return DataNotAvailable; + } + } + m_bPageLoadedOK = TRUE; + } + + if (!m_bAnnotsLoad) { + if (!CheckPageAnnots(iPage, pHints)) + return DataNotAvailable; + m_bAnnotsLoad = TRUE; + } + + if (m_pPageDict && !m_bNeedDownLoadResource) { + m_pPageResource = m_pPageDict->GetElement("Resources"); + if (!m_pPageResource) + m_bNeedDownLoadResource = HaveResourceAncestor(m_pPageDict); + else + m_bNeedDownLoadResource = TRUE; + } + + if (m_bNeedDownLoadResource) { + FX_BOOL bRet = CheckResources(pHints); + if (!bRet) + return DataNotAvailable; + m_bNeedDownLoadResource = FALSE; + } + + m_bPageLoadedOK = FALSE; + m_bAnnotsLoad = FALSE; + m_bCurPageDictLoadOK = FALSE; + + ResetFirstCheck(iPage); + m_pagesLoadState.insert(iPage); + return DataAvailable; +} + +FX_BOOL CPDF_DataAvail::CheckResources(IPDF_DataAvail::DownloadHints* pHints) { + if (!m_objs_array.GetSize()) { + m_objs_array.RemoveAll(); + CFX_ArrayTemplate<CPDF_Object*> obj_array; + obj_array.Add(m_pPageResource); + + FX_BOOL bRet = IsObjectsAvail(obj_array, TRUE, pHints, m_objs_array); + if (bRet) + m_objs_array.RemoveAll(); + return bRet; + } + + CFX_ArrayTemplate<CPDF_Object*> new_objs_array; + FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); + m_objs_array.RemoveAll(); + if (!bRet) + m_objs_array.Append(new_objs_array); + return bRet; +} + +void CPDF_DataAvail::GetLinearizedMainXRefInfo(FX_FILESIZE* pPos, + FX_DWORD* pSize) { + if (pPos) + *pPos = m_dwLastXRefOffset; + if (pSize) + *pSize = (FX_DWORD)(m_dwFileLen - m_dwLastXRefOffset); +} + +int CPDF_DataAvail::GetPageCount() const { + if (m_pLinearized) { + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + CPDF_Object* pObj = pDict ? pDict->GetElementValue("N") : nullptr; + return pObj ? pObj->GetInteger() : 0; + } + return m_pDocument ? m_pDocument->GetPageCount() : 0; +} + +CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) { + if (!m_pDocument || index < 0 || index >= GetPageCount()) + return nullptr; + + if (m_pLinearized) { + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + CPDF_Object* pObj = pDict ? pDict->GetElementValue("P") : nullptr; + + int pageNum = pObj ? pObj->GetInteger() : 0; + if (m_pHintTables && index != pageNum) { + FX_FILESIZE szPageStartPos = 0; + FX_FILESIZE szPageLength = 0; + FX_DWORD dwObjNum = 0; + FX_BOOL bPagePosGot = m_pHintTables->GetPagePos(index, szPageStartPos, + szPageLength, dwObjNum); + if (!bPagePosGot) + return nullptr; + + m_syntaxParser.InitParser(m_pFileRead, (FX_DWORD)szPageStartPos); + CPDF_Object* pPageDict = ParseIndirectObjectAt(0, dwObjNum, m_pDocument); + if (!pPageDict) + return nullptr; + + if (!m_pDocument->InsertIndirectObject(dwObjNum, pPageDict)) + return nullptr; + return pPageDict->GetDict(); + } + } + return m_pDocument->GetPage(index); +} + +IPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail( + IPDF_DataAvail::DownloadHints* pHints) { + if (!m_pDocument) + return FormAvailable; + + if (!m_bLinearizedFormParamLoad) { + CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); + if (!pRoot) + return FormAvailable; + + CPDF_Object* pAcroForm = pRoot->GetElement("AcroForm"); + if (!pAcroForm) + return FormNotExist; + + DocAvailStatus nDocStatus = CheckLinearizedData(pHints); + if (nDocStatus == DataError) + return FormError; + if (nDocStatus == DataNotAvailable) + return FormNotAvailable; + + if (!m_objs_array.GetSize()) + m_objs_array.Add(pAcroForm->GetDict()); + m_bLinearizedFormParamLoad = TRUE; + } + + CFX_ArrayTemplate<CPDF_Object*> new_objs_array; + FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); + m_objs_array.RemoveAll(); + if (!bRet) { + m_objs_array.Append(new_objs_array); + return FormNotAvailable; + } + return FormAvailable; +} + +CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {} + +CPDF_DataAvail::PageNode::~PageNode() { + for (int32_t i = 0; i < m_childNode.GetSize(); ++i) + delete m_childNode[i]; + m_childNode.RemoveAll(); +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_data_avail.h b/core/fpdfapi/fpdf_parser/cpdf_data_avail.h new file mode 100644 index 0000000000..2a6995914c --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_data_avail.h @@ -0,0 +1,217 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_FPDFAPI_FPDF_PARSER_CPDF_DATA_AVAIL_H_ +#define CORE_FPDFAPI_FPDF_PARSER_CPDF_DATA_AVAIL_H_ + +#include "core/fpdfapi/fpdf_parser/cpdf_hint_tables.h" +#include "core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" +#include "core/include/fpdfapi/cpdf_parser.h" +#include "core/include/fpdfapi/ipdf_data_avail.h" +#include "core/include/fxcrt/fx_basic.h" + +class CPDF_Dictionary; +class CPDF_IndirectObjectHolder; +class CPDF_Parser; + +enum PDF_DATAAVAIL_STATUS { + PDF_DATAAVAIL_HEADER = 0, + PDF_DATAAVAIL_FIRSTPAGE, + PDF_DATAAVAIL_FIRSTPAGE_PREPARE, + PDF_DATAAVAIL_HINTTABLE, + PDF_DATAAVAIL_END, + PDF_DATAAVAIL_CROSSREF, + PDF_DATAAVAIL_CROSSREF_ITEM, + PDF_DATAAVAIL_CROSSREF_STREAM, + PDF_DATAAVAIL_TRAILER, + PDF_DATAAVAIL_LOADALLCROSSREF, + PDF_DATAAVAIL_ROOT, + PDF_DATAAVAIL_INFO, + PDF_DATAAVAIL_ACROFORM, + PDF_DATAAVAIL_ACROFORM_SUBOBJECT, + PDF_DATAAVAIL_PAGETREE, + PDF_DATAAVAIL_PAGE, + PDF_DATAAVAIL_PAGE_LATERLOAD, + PDF_DATAAVAIL_RESOURCES, + PDF_DATAAVAIL_DONE, + PDF_DATAAVAIL_ERROR, + PDF_DATAAVAIL_LOADALLFILE, + PDF_DATAAVAIL_TRAILER_APPEND +}; + +enum PDF_PAGENODE_TYPE { + PDF_PAGENODE_UNKNOWN = 0, + PDF_PAGENODE_PAGE, + PDF_PAGENODE_PAGES, + PDF_PAGENODE_ARRAY, +}; + +class CPDF_DataAvail final : public IPDF_DataAvail { + public: + CPDF_DataAvail(FileAvail* pFileAvail, + IFX_FileRead* pFileRead, + FX_BOOL bSupportHintTable); + ~CPDF_DataAvail() override; + + // IPDF_DataAvail: + DocAvailStatus IsDocAvail(DownloadHints* pHints) override; + void SetDocument(CPDF_Document* pDoc) override; + DocAvailStatus IsPageAvail(int iPage, DownloadHints* pHints) override; + DocFormStatus IsFormAvail(DownloadHints* pHints) override; + DocLinearizationStatus IsLinearizedPDF() override; + FX_BOOL IsLinearized() override { return m_bLinearized; } + void GetLinearizedMainXRefInfo(FX_FILESIZE* pPos, FX_DWORD* pSize) override; + + int GetPageCount() const; + CPDF_Dictionary* GetPage(int index); + + friend class CPDF_HintTables; + + protected: + class PageNode { + public: + PageNode(); + ~PageNode(); + + PDF_PAGENODE_TYPE m_type; + FX_DWORD m_dwPageNo; + CFX_ArrayTemplate<PageNode*> m_childNode; + }; + + static const int kMaxDataAvailRecursionDepth = 64; + static int s_CurrentDataAvailRecursionDepth; + static const int kMaxPageRecursionDepth = 1024; + + FX_DWORD GetObjectSize(FX_DWORD objnum, FX_FILESIZE& offset); + FX_BOOL IsObjectsAvail(CFX_ArrayTemplate<CPDF_Object*>& obj_array, + FX_BOOL bParsePage, + IPDF_DataAvail::DownloadHints* pHints, + CFX_ArrayTemplate<CPDF_Object*>& ret_array); + FX_BOOL CheckDocStatus(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckHeader(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckFirstPage(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckHintTables(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckEnd(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckCrossRef(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckCrossRefItem(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckTrailer(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckRoot(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckInfo(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckPages(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckPage(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckResources(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckAnnots(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckAcroForm(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckAcroFormSubObject(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckTrailerAppend(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckPageStatus(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckAllCrossRefStream(IPDF_DataAvail::DownloadHints* pHints); + + int32_t CheckCrossRefStream(IPDF_DataAvail::DownloadHints* pHints, + FX_FILESIZE& xref_offset); + FX_BOOL IsLinearizedFile(uint8_t* pData, FX_DWORD dwLen); + void SetStartOffset(FX_FILESIZE dwOffset); + FX_BOOL GetNextToken(CFX_ByteString& token); + FX_BOOL GetNextChar(uint8_t& ch); + CPDF_Object* ParseIndirectObjectAt( + FX_FILESIZE pos, + FX_DWORD objnum, + CPDF_IndirectObjectHolder* pObjList = nullptr); + CPDF_Object* GetObject(FX_DWORD objnum, + IPDF_DataAvail::DownloadHints* pHints, + FX_BOOL* pExistInFile); + FX_BOOL GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages); + FX_BOOL PreparePageItem(); + FX_BOOL LoadPages(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL LoadAllXref(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL LoadAllFile(IPDF_DataAvail::DownloadHints* pHints); + DocAvailStatus CheckLinearizedData(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckPageAnnots(int iPage, IPDF_DataAvail::DownloadHints* pHints); + + DocAvailStatus CheckLinearizedFirstPage( + int iPage, + IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL HaveResourceAncestor(CPDF_Dictionary* pDict); + FX_BOOL CheckPage(int32_t iPage, IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL LoadDocPages(IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL LoadDocPage(int32_t iPage, IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckPageNode(PageNode& pageNodes, + int32_t iPage, + int32_t& iCount, + IPDF_DataAvail::DownloadHints* pHints, + int level); + FX_BOOL CheckUnkownPageNode(FX_DWORD dwPageNo, + PageNode* pPageNode, + IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckArrayPageNode(FX_DWORD dwPageNo, + PageNode* pPageNode, + IPDF_DataAvail::DownloadHints* pHints); + FX_BOOL CheckPageCount(IPDF_DataAvail::DownloadHints* pHints); + bool IsFirstCheck(int iPage); + void ResetFirstCheck(int iPage); + FX_BOOL IsDataAvail(FX_FILESIZE offset, + FX_DWORD size, + IPDF_DataAvail::DownloadHints* pHints); + + CPDF_Parser m_parser; + CPDF_SyntaxParser m_syntaxParser; + CPDF_Object* m_pRoot; + FX_DWORD m_dwRootObjNum; + FX_DWORD m_dwInfoObjNum; + CPDF_Object* m_pLinearized; + CPDF_Object* m_pTrailer; + FX_BOOL m_bDocAvail; + FX_FILESIZE m_dwHeaderOffset; + FX_FILESIZE m_dwLastXRefOffset; + FX_FILESIZE m_dwXRefOffset; + FX_FILESIZE m_dwTrailerOffset; + FX_FILESIZE m_dwCurrentOffset; + PDF_DATAAVAIL_STATUS m_docStatus; + FX_FILESIZE m_dwFileLen; + CPDF_Document* m_pDocument; + std::set<FX_DWORD> m_ObjectSet; + CFX_ArrayTemplate<CPDF_Object*> m_objs_array; + FX_FILESIZE m_Pos; + FX_FILESIZE m_bufferOffset; + FX_DWORD m_bufferSize; + CFX_ByteString m_WordBuf; + uint8_t m_bufferData[512]; + CFX_DWordArray m_XRefStreamList; + CFX_DWordArray m_PageObjList; + FX_DWORD m_PagesObjNum; + FX_BOOL m_bLinearized; + FX_DWORD m_dwFirstPageNo; + FX_BOOL m_bLinearedDataOK; + FX_BOOL m_bMainXRefLoadTried; + FX_BOOL m_bMainXRefLoadedOK; + FX_BOOL m_bPagesTreeLoad; + FX_BOOL m_bPagesLoad; + CPDF_Parser* m_pCurrentParser; + FX_FILESIZE m_dwCurrentXRefSteam; + FX_BOOL m_bAnnotsLoad; + FX_BOOL m_bHaveAcroForm; + FX_DWORD m_dwAcroFormObjNum; + FX_BOOL m_bAcroFormLoad; + CPDF_Object* m_pAcroForm; + CFX_ArrayTemplate<CPDF_Object*> m_arrayAcroforms; + CPDF_Dictionary* m_pPageDict; + CPDF_Object* m_pPageResource; + FX_BOOL m_bNeedDownLoadResource; + FX_BOOL m_bPageLoadedOK; + FX_BOOL m_bLinearizedFormParamLoad; + CFX_ArrayTemplate<CPDF_Object*> m_PagesArray; + FX_DWORD m_dwEncryptObjNum; + FX_FILESIZE m_dwPrevXRefOffset; + FX_BOOL m_bTotalLoadPageTree; + FX_BOOL m_bCurPageDictLoadOK; + PageNode m_pageNodes; + std::set<FX_DWORD> m_pageMapCheckState; + std::set<FX_DWORD> m_pagesLoadState; + std::unique_ptr<CPDF_HintTables> m_pHintTables; + FX_BOOL m_bSupportHintTable; +}; + +#endif // CORE_FPDFAPI_FPDF_PARSER_CPDF_DATA_AVAIL_H_ diff --git a/core/fpdfapi/fpdf_parser/cpdf_dictionary.cpp b/core/fpdfapi/fpdf_parser/cpdf_dictionary.cpp new file mode 100644 index 0000000000..864cd59744 --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_dictionary.cpp @@ -0,0 +1,271 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/include/fpdfapi/cpdf_dictionary.h" + +#include "core/include/fpdfapi/cpdf_array.h" +#include "core/include/fpdfapi/cpdf_boolean.h" +#include "core/include/fpdfapi/cpdf_name.h" +#include "core/include/fpdfapi/cpdf_number.h" +#include "core/include/fpdfapi/cpdf_reference.h" +#include "core/include/fpdfapi/cpdf_stream.h" +#include "core/include/fpdfapi/cpdf_string.h" +#include "third_party/base/stl_util.h" + +CPDF_Dictionary::CPDF_Dictionary() {} + +CPDF_Dictionary::~CPDF_Dictionary() { + for (const auto& it : m_Map) + it.second->Release(); +} + +CPDF_Object::Type CPDF_Dictionary::GetType() const { + return DICTIONARY; +} + +CPDF_Dictionary* CPDF_Dictionary::GetDict() const { + // The method should be made non-const if we want to not be const. + // See bug #234. + return const_cast<CPDF_Dictionary*>(this); +} + +bool CPDF_Dictionary::IsDictionary() const { + return true; +} + +CPDF_Dictionary* CPDF_Dictionary::AsDictionary() { + return this; +} + +const CPDF_Dictionary* CPDF_Dictionary::AsDictionary() const { + return this; +} + +CPDF_Object* CPDF_Dictionary::Clone(FX_BOOL bDirect) const { + CPDF_Dictionary* pCopy = new CPDF_Dictionary(); + for (const auto& it : *this) + pCopy->m_Map.insert(std::make_pair(it.first, it.second->Clone(bDirect))); + return pCopy; +} + +CPDF_Object* CPDF_Dictionary::GetElement(const CFX_ByteStringC& key) const { + auto it = m_Map.find(key); + if (it == m_Map.end()) + return nullptr; + return it->second; +} +CPDF_Object* CPDF_Dictionary::GetElementValue( + const CFX_ByteStringC& key) const { + CPDF_Object* p = GetElement(key); + return p ? p->GetDirect() : nullptr; +} + +CFX_ByteString CPDF_Dictionary::GetStringBy(const CFX_ByteStringC& key) const { + CPDF_Object* p = GetElement(key); + return p ? p->GetString() : CFX_ByteString(); +} + +CFX_ByteStringC CPDF_Dictionary::GetConstStringBy( + const CFX_ByteStringC& key) const { + CPDF_Object* p = GetElement(key); + return p ? p->GetConstString() : CFX_ByteStringC(); +} + +CFX_WideString CPDF_Dictionary::GetUnicodeTextBy( + const CFX_ByteStringC& key) const { + CPDF_Object* p = GetElement(key); + if (CPDF_Reference* pRef = ToReference(p)) + p = pRef->GetDirect(); + return p ? p->GetUnicodeText() : CFX_WideString(); +} + +CFX_ByteString CPDF_Dictionary::GetStringBy(const CFX_ByteStringC& key, + const CFX_ByteStringC& def) const { + CPDF_Object* p = GetElement(key); + return p ? p->GetString() : CFX_ByteString(def); +} + +CFX_ByteStringC CPDF_Dictionary::GetConstStringBy( + const CFX_ByteStringC& key, + const CFX_ByteStringC& def) const { + CPDF_Object* p = GetElement(key); + return p ? p->GetConstString() : CFX_ByteStringC(def); +} + +int CPDF_Dictionary::GetIntegerBy(const CFX_ByteStringC& key) const { + CPDF_Object* p = GetElement(key); + return p ? p->GetInteger() : 0; +} + +int CPDF_Dictionary::GetIntegerBy(const CFX_ByteStringC& key, int def) const { + CPDF_Object* p = GetElement(key); + return p ? p->GetInteger() : def; +} + +FX_FLOAT CPDF_Dictionary::GetNumberBy(const CFX_ByteStringC& key) const { + CPDF_Object* p = GetElement(key); + return p ? p->GetNumber() : 0; +} + +FX_BOOL CPDF_Dictionary::GetBooleanBy(const CFX_ByteStringC& key, + FX_BOOL bDefault) const { + CPDF_Object* p = GetElement(key); + return ToBoolean(p) ? p->GetInteger() : bDefault; +} + +CPDF_Dictionary* CPDF_Dictionary::GetDictBy(const CFX_ByteStringC& key) const { + CPDF_Object* p = GetElementValue(key); + if (!p) + return nullptr; + if (CPDF_Dictionary* pDict = p->AsDictionary()) + return pDict; + if (CPDF_Stream* pStream = p->AsStream()) + return pStream->GetDict(); + return nullptr; +} + +CPDF_Array* CPDF_Dictionary::GetArrayBy(const CFX_ByteStringC& key) const { + return ToArray(GetElementValue(key)); +} + +CPDF_Stream* CPDF_Dictionary::GetStreamBy(const CFX_ByteStringC& key) const { + return ToStream(GetElementValue(key)); +} + +CFX_FloatRect CPDF_Dictionary::GetRectBy(const CFX_ByteStringC& key) const { + CFX_FloatRect rect; + CPDF_Array* pArray = GetArrayBy(key); + if (pArray) + rect = pArray->GetRect(); + return rect; +} + +CFX_Matrix CPDF_Dictionary::GetMatrixBy(const CFX_ByteStringC& key) const { + CFX_Matrix matrix; + CPDF_Array* pArray = GetArrayBy(key); + if (pArray) + matrix = pArray->GetMatrix(); + return matrix; +} + +FX_BOOL CPDF_Dictionary::KeyExist(const CFX_ByteStringC& key) const { + return pdfium::ContainsKey(m_Map, key); +} + +bool CPDF_Dictionary::IsSignatureDict() const { + CPDF_Object* pType = GetElementValue("Type"); + if (!pType) + pType = GetElementValue("FT"); + return pType && pType->GetString() == "Sig"; +} + +void CPDF_Dictionary::SetAt(const CFX_ByteStringC& key, CPDF_Object* pObj) { + ASSERT(IsDictionary()); + // Avoid 2 constructions of CFX_ByteString. + CFX_ByteString key_bytestring = key; + auto it = m_Map.find(key_bytestring); + if (it == m_Map.end()) { + if (pObj) + m_Map.insert(std::make_pair(key_bytestring, pObj)); + return; + } + + if (it->second == pObj) + return; + it->second->Release(); + + if (pObj) + it->second = pObj; + else + m_Map.erase(it); +} + +void CPDF_Dictionary::RemoveAt(const CFX_ByteStringC& key) { + auto it = m_Map.find(key); + if (it == m_Map.end()) + return; + + it->second->Release(); + m_Map.erase(it); +} + +void CPDF_Dictionary::ReplaceKey(const CFX_ByteStringC& oldkey, + const CFX_ByteStringC& newkey) { + auto old_it = m_Map.find(oldkey); + if (old_it == m_Map.end()) + return; + + // Avoid 2 constructions of CFX_ByteString. + CFX_ByteString newkey_bytestring = newkey; + auto new_it = m_Map.find(newkey_bytestring); + if (new_it == old_it) + return; + + if (new_it != m_Map.end()) { + new_it->second->Release(); + new_it->second = old_it->second; + } else { + m_Map.insert(std::make_pair(newkey_bytestring, old_it->second)); + } + m_Map.erase(old_it); +} + +void CPDF_Dictionary::SetAtInteger(const CFX_ByteStringC& key, int i) { + SetAt(key, new CPDF_Number(i)); +} + +void CPDF_Dictionary::SetAtName(const CFX_ByteStringC& key, + const CFX_ByteString& name) { + SetAt(key, new CPDF_Name(name)); +} + +void CPDF_Dictionary::SetAtString(const CFX_ByteStringC& key, + const CFX_ByteString& str) { + SetAt(key, new CPDF_String(str, FALSE)); +} + +void CPDF_Dictionary::SetAtReference(const CFX_ByteStringC& key, + CPDF_IndirectObjectHolder* pDoc, + FX_DWORD objnum) { + SetAt(key, new CPDF_Reference(pDoc, objnum)); +} + +void CPDF_Dictionary::AddReference(const CFX_ByteStringC& key, + CPDF_IndirectObjectHolder* pDoc, + FX_DWORD objnum) { + SetAt(key, new CPDF_Reference(pDoc, objnum)); +} + +void CPDF_Dictionary::SetAtNumber(const CFX_ByteStringC& key, FX_FLOAT f) { + CPDF_Number* pNumber = new CPDF_Number(f); + SetAt(key, pNumber); +} + +void CPDF_Dictionary::SetAtBoolean(const CFX_ByteStringC& key, FX_BOOL bValue) { + SetAt(key, new CPDF_Boolean(bValue)); +} + +void CPDF_Dictionary::SetAtRect(const CFX_ByteStringC& key, + const CFX_FloatRect& rect) { + CPDF_Array* pArray = new CPDF_Array; + pArray->AddNumber(rect.left); + pArray->AddNumber(rect.bottom); + pArray->AddNumber(rect.right); + pArray->AddNumber(rect.top); + SetAt(key, pArray); +} + +void CPDF_Dictionary::SetAtMatrix(const CFX_ByteStringC& key, + const CFX_Matrix& matrix) { + CPDF_Array* pArray = new CPDF_Array; + pArray->AddNumber(matrix.a); + pArray->AddNumber(matrix.b); + pArray->AddNumber(matrix.c); + pArray->AddNumber(matrix.d); + pArray->AddNumber(matrix.e); + pArray->AddNumber(matrix.f); + SetAt(key, pArray); +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_document.cpp b/core/fpdfapi/fpdf_parser/cpdf_document.cpp new file mode 100644 index 0000000000..c6bdcddfbe --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_document.cpp @@ -0,0 +1,354 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/include/fpdfapi/cpdf_document.h" + +#include <set> + +#include "core/fpdfapi/fpdf_render/render_int.h" +#include "core/include/fpdfapi/cpdf_array.h" +#include "core/include/fpdfapi/cpdf_dictionary.h" +#include "core/include/fpdfapi/cpdf_parser.h" +#include "core/include/fpdfapi/cpdf_reference.h" +#include "core/include/fpdfapi/fpdf_module.h" +#include "core/include/fxge/fx_font.h" +#include "third_party/base/stl_util.h" + +namespace { + +int CountPages(CPDF_Dictionary* pPages, + std::set<CPDF_Dictionary*>* visited_pages) { + int count = pPages->GetIntegerBy("Count"); + if (count > 0 && count < FPDF_PAGE_MAX_NUM) { + return count; + } + CPDF_Array* pKidList = pPages->GetArrayBy("Kids"); + if (!pKidList) { + return 0; + } + count = 0; + for (FX_DWORD i = 0; i < pKidList->GetCount(); i++) { + CPDF_Dictionary* pKid = pKidList->GetDictAt(i); + if (!pKid || pdfium::ContainsKey(*visited_pages, pKid)) { + continue; + } + if (pKid->KeyExist("Kids")) { + // Use |visited_pages| to help detect circular references of pages. + pdfium::ScopedSetInsertion<CPDF_Dictionary*> local_add(visited_pages, + pKid); + count += CountPages(pKid, visited_pages); + } else { + // This page is a leaf node. + count++; + } + } + pPages->SetAtInteger("Count", count); + return count; +} + +} // namespace + +CPDF_Document::CPDF_Document(CPDF_Parser* pParser) + : CPDF_IndirectObjectHolder(pParser) { + ASSERT(pParser); + m_pRootDict = NULL; + m_pInfoDict = NULL; + m_bLinearized = FALSE; + m_dwFirstPageNo = 0; + m_dwFirstPageObjNum = 0; + m_pDocPage = CPDF_ModuleMgr::Get()->GetPageModule()->CreateDocData(this); + m_pDocRender = CPDF_ModuleMgr::Get()->GetRenderModule()->CreateDocData(this); +} +CPDF_DocPageData* CPDF_Document::GetValidatePageData() { + if (m_pDocPage) { + return m_pDocPage; + } + m_pDocPage = CPDF_ModuleMgr::Get()->GetPageModule()->CreateDocData(this); + return m_pDocPage; +} +CPDF_DocRenderData* CPDF_Document::GetValidateRenderData() { + if (m_pDocRender) { + return m_pDocRender; + } + m_pDocRender = CPDF_ModuleMgr::Get()->GetRenderModule()->CreateDocData(this); + return m_pDocRender; +} +void CPDF_Document::LoadDoc() { + m_LastObjNum = m_pParser->GetLastObjNum(); + CPDF_Object* pRootObj = GetIndirectObject(m_pParser->GetRootObjNum()); + if (!pRootObj) { + return; + } + m_pRootDict = pRootObj->GetDict(); + if (!m_pRootDict) { + return; + } + CPDF_Object* pInfoObj = GetIndirectObject(m_pParser->GetInfoObjNum()); + if (pInfoObj) { + m_pInfoDict = pInfoObj->GetDict(); + } + CPDF_Array* pIDArray = m_pParser->GetIDArray(); + if (pIDArray) { + m_ID1 = pIDArray->GetStringAt(0); + m_ID2 = pIDArray->GetStringAt(1); + } + m_PageList.SetSize(RetrievePageCount()); +} +void CPDF_Document::LoadAsynDoc(CPDF_Dictionary* pLinearized) { + m_bLinearized = TRUE; + m_LastObjNum = m_pParser->GetLastObjNum(); + CPDF_Object* pIndirectObj = GetIndirectObject(m_pParser->GetRootObjNum()); + m_pRootDict = pIndirectObj ? pIndirectObj->GetDict() : nullptr; + if (!m_pRootDict) { + return; + } + pIndirectObj = GetIndirectObject(m_pParser->GetInfoObjNum()); + m_pInfoDict = pIndirectObj ? pIndirectObj->GetDict() : nullptr; + CPDF_Array* pIDArray = m_pParser->GetIDArray(); + if (pIDArray) { + m_ID1 = pIDArray->GetStringAt(0); + m_ID2 = pIDArray->GetStringAt(1); + } + FX_DWORD dwPageCount = 0; + CPDF_Object* pCount = pLinearized->GetElement("N"); + if (ToNumber(pCount)) + dwPageCount = pCount->GetInteger(); + + m_PageList.SetSize(dwPageCount); + CPDF_Object* pNo = pLinearized->GetElement("P"); + if (ToNumber(pNo)) + m_dwFirstPageNo = pNo->GetInteger(); + + CPDF_Object* pObjNum = pLinearized->GetElement("O"); + if (ToNumber(pObjNum)) + m_dwFirstPageObjNum = pObjNum->GetInteger(); +} +void CPDF_Document::LoadPages() { + m_PageList.SetSize(RetrievePageCount()); +} +CPDF_Document::~CPDF_Document() { + if (m_pDocPage) { + CPDF_ModuleMgr::Get()->GetPageModule()->ReleaseDoc(this); + CPDF_ModuleMgr::Get()->GetPageModule()->ClearStockFont(this); + } + if (m_pDocRender) { + CPDF_ModuleMgr::Get()->GetRenderModule()->DestroyDocData(m_pDocRender); + } +} +#define FX_MAX_PAGE_LEVEL 1024 +CPDF_Dictionary* CPDF_Document::_FindPDFPage(CPDF_Dictionary* pPages, + int iPage, + int nPagesToGo, + int level) { + CPDF_Array* pKidList = pPages->GetArrayBy("Kids"); + if (!pKidList) { + if (nPagesToGo == 0) { + return pPages; + } + return NULL; + } + if (level >= FX_MAX_PAGE_LEVEL) { + return NULL; + } + int nKids = pKidList->GetCount(); + for (int i = 0; i < nKids; i++) { + CPDF_Dictionary* pKid = pKidList->GetDictAt(i); + if (!pKid) { + nPagesToGo--; + continue; + } + if (pKid == pPages) { + continue; + } + if (!pKid->KeyExist("Kids")) { + if (nPagesToGo == 0) { + return pKid; + } + m_PageList.SetAt(iPage - nPagesToGo, pKid->GetObjNum()); + nPagesToGo--; + } else { + int nPages = pKid->GetIntegerBy("Count"); + if (nPagesToGo < nPages) { + return _FindPDFPage(pKid, iPage, nPagesToGo, level + 1); + } + nPagesToGo -= nPages; + } + } + return NULL; +} + +CPDF_Dictionary* CPDF_Document::GetPage(int iPage) { + if (iPage < 0 || iPage >= m_PageList.GetSize()) + return nullptr; + + if (m_bLinearized && (iPage == (int)m_dwFirstPageNo)) { + if (CPDF_Dictionary* pDict = + ToDictionary(GetIndirectObject(m_dwFirstPageObjNum))) { + return pDict; + } + } + + int objnum = m_PageList.GetAt(iPage); + if (objnum) { + if (CPDF_Dictionary* pDict = ToDictionary(GetIndirectObject(objnum))) + return pDict; + } + + CPDF_Dictionary* pRoot = GetRoot(); + if (!pRoot) + return nullptr; + + CPDF_Dictionary* pPages = pRoot->GetDictBy("Pages"); + if (!pPages) + return nullptr; + + CPDF_Dictionary* pPage = _FindPDFPage(pPages, iPage, iPage, 0); + if (!pPage) + return nullptr; + + m_PageList.SetAt(iPage, pPage->GetObjNum()); + return pPage; +} + +int CPDF_Document::_FindPageIndex(CPDF_Dictionary* pNode, + FX_DWORD& skip_count, + FX_DWORD objnum, + int& index, + int level) { + if (pNode->KeyExist("Kids")) { + CPDF_Array* pKidList = pNode->GetArrayBy("Kids"); + if (!pKidList) { + return -1; + } + if (level >= FX_MAX_PAGE_LEVEL) { + return -1; + } + FX_DWORD count = pNode->GetIntegerBy("Count"); + if (count <= skip_count) { + skip_count -= count; + index += count; + return -1; + } + if (count && count == pKidList->GetCount()) { + for (FX_DWORD i = 0; i < count; i++) { + if (CPDF_Reference* pKid = ToReference(pKidList->GetElement(i))) { + if (pKid->GetRefObjNum() == objnum) { + m_PageList.SetAt(index + i, objnum); + return index + i; + } + } + } + } + for (FX_DWORD i = 0; i < pKidList->GetCount(); i++) { + CPDF_Dictionary* pKid = pKidList->GetDictAt(i); + if (!pKid) { + continue; + } + if (pKid == pNode) { + continue; + } + int found_index = + _FindPageIndex(pKid, skip_count, objnum, index, level + 1); + if (found_index >= 0) { + return found_index; + } + } + } else { + if (objnum == pNode->GetObjNum()) { + return index; + } + if (skip_count) { + skip_count--; + } + index++; + } + return -1; +} +int CPDF_Document::GetPageIndex(FX_DWORD objnum) { + FX_DWORD nPages = m_PageList.GetSize(); + FX_DWORD skip_count = 0; + FX_BOOL bSkipped = FALSE; + for (FX_DWORD i = 0; i < nPages; i++) { + FX_DWORD objnum1 = m_PageList.GetAt(i); + if (objnum1 == objnum) { + return i; + } + if (!bSkipped && objnum1 == 0) { + skip_count = i; + bSkipped = TRUE; + } + } + CPDF_Dictionary* pRoot = GetRoot(); + if (!pRoot) { + return -1; + } + CPDF_Dictionary* pPages = pRoot->GetDictBy("Pages"); + if (!pPages) { + return -1; + } + int index = 0; + return _FindPageIndex(pPages, skip_count, objnum, index); +} +int CPDF_Document::GetPageCount() const { + return m_PageList.GetSize(); +} + +int CPDF_Document::RetrievePageCount() const { + CPDF_Dictionary* pRoot = GetRoot(); + if (!pRoot) { + return 0; + } + CPDF_Dictionary* pPages = pRoot->GetDictBy("Pages"); + if (!pPages) { + return 0; + } + if (!pPages->KeyExist("Kids")) { + return 1; + } + std::set<CPDF_Dictionary*> visited_pages; + visited_pages.insert(pPages); + return CountPages(pPages, &visited_pages); +} + +FX_DWORD CPDF_Document::GetUserPermissions(FX_BOOL bCheckRevision) const { + if (!m_pParser) { + return (FX_DWORD)-1; + } + return m_pParser->GetPermissions(bCheckRevision); +} + +FX_BOOL CPDF_Document::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm) const { + auto it = m_IndirectObjs.find(objnum); + if (it != m_IndirectObjs.end()) { + CPDF_Stream* pStream = it->second->AsStream(); + bForm = pStream && pStream->GetDict()->GetStringBy("Subtype") == "Form"; + return TRUE; + } + if (!m_pParser) { + bForm = FALSE; + return TRUE; + } + return m_pParser->IsFormStream(objnum, bForm); +} + +void CPDF_Document::ClearPageData() { + if (m_pDocPage) + CPDF_ModuleMgr::Get()->GetPageModule()->ClearDoc(this); +} + +void CPDF_Document::ClearRenderData() { + if (m_pDocRender) + CPDF_ModuleMgr::Get()->GetRenderModule()->ClearDocData(m_pDocRender); +} + +void CPDF_Document::ClearRenderFont() { + if (!m_pDocRender) + return; + + CFX_FontCache* pCache = m_pDocRender->GetFontCache(); + if (pCache) + pCache->FreeCache(FALSE); +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_hint_tables.cpp b/core/fpdfapi/fpdf_parser/cpdf_hint_tables.cpp new file mode 100644 index 0000000000..012b152498 --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_hint_tables.cpp @@ -0,0 +1,469 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fpdfapi/fpdf_parser/cpdf_hint_tables.h" + +#include "core/fpdfapi/fpdf_parser/cpdf_data_avail.h" +#include "core/include/fpdfapi/cpdf_array.h" +#include "core/include/fpdfapi/cpdf_dictionary.h" +#include "core/include/fpdfapi/cpdf_stream.h" +#include "core/include/fxcrt/fx_safe_types.h" + +namespace { + +bool CanReadFromBitStream(const CFX_BitStream* hStream, + const FX_SAFE_DWORD& num_bits) { + return num_bits.IsValid() && + hStream->BitsRemaining() >= num_bits.ValueOrDie(); +} + +} // namespace + +CPDF_HintTables::~CPDF_HintTables() { + m_dwDeltaNObjsArray.RemoveAll(); + m_dwNSharedObjsArray.RemoveAll(); + m_dwSharedObjNumArray.RemoveAll(); + m_dwIdentifierArray.RemoveAll(); +} + +FX_DWORD CPDF_HintTables::GetItemLength( + int index, + const std::vector<FX_FILESIZE>& szArray) { + if (index < 0 || szArray.size() < 2 || + static_cast<size_t>(index) > szArray.size() - 2 || + szArray[index] > szArray[index + 1]) { + return 0; + } + return szArray[index + 1] - szArray[index]; +} + +FX_BOOL CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) { + if (!hStream || hStream->IsEOF()) + return FALSE; + + int nStreamOffset = ReadPrimaryHintStreamOffset(); + int nStreamLen = ReadPrimaryHintStreamLength(); + if (nStreamOffset < 0 || nStreamLen < 1) + return FALSE; + + const FX_DWORD kHeaderSize = 288; + if (hStream->BitsRemaining() < kHeaderSize) + return FALSE; + + // Item 1: The least number of objects in a page. + FX_DWORD dwObjLeastNum = hStream->GetBits(32); + + // Item 2: The location of the first page's page object. + FX_DWORD dwFirstObjLoc = hStream->GetBits(32); + if (dwFirstObjLoc > nStreamOffset) { + FX_SAFE_DWORD safeLoc = pdfium::base::checked_cast<FX_DWORD>(nStreamLen); + safeLoc += dwFirstObjLoc; + if (!safeLoc.IsValid()) + return FALSE; + m_szFirstPageObjOffset = + pdfium::base::checked_cast<FX_FILESIZE>(safeLoc.ValueOrDie()); + } else { + m_szFirstPageObjOffset = + pdfium::base::checked_cast<FX_FILESIZE>(dwFirstObjLoc); + } + + // Item 3: The number of bits needed to represent the difference + // between the greatest and least number of objects in a page. + FX_DWORD dwDeltaObjectsBits = hStream->GetBits(16); + + // Item 4: The least length of a page in bytes. + FX_DWORD dwPageLeastLen = hStream->GetBits(32); + + // Item 5: The number of bits needed to represent the difference + // between the greatest and least length of a page, in bytes. + FX_DWORD dwDeltaPageLenBits = hStream->GetBits(16); + + // Skip Item 6, 7, 8, 9 total 96 bits. + hStream->SkipBits(96); + + // Item 10: The number of bits needed to represent the greatest + // number of shared object references. + FX_DWORD dwSharedObjBits = hStream->GetBits(16); + + // Item 11: The number of bits needed to represent the numerically + // greatest shared object identifier used by the pages. + FX_DWORD dwSharedIdBits = hStream->GetBits(16); + + // Item 12: The number of bits needed to represent the numerator of + // the fractional position for each shared object reference. For each + // shared object referenced from a page, there is an indication of + // where in the page's content stream the object is first referenced. + FX_DWORD dwSharedNumeratorBits = hStream->GetBits(16); + + // Item 13: Skip Item 13 which has 16 bits. + hStream->SkipBits(16); + + CPDF_Object* pPageNum = m_pLinearizedDict->GetElementValue("N"); + int nPages = pPageNum ? pPageNum->GetInteger() : 0; + if (nPages < 1) + return FALSE; + + FX_SAFE_DWORD required_bits = dwDeltaObjectsBits; + required_bits *= pdfium::base::checked_cast<FX_DWORD>(nPages); + if (!CanReadFromBitStream(hStream, required_bits)) + return FALSE; + + for (int i = 0; i < nPages; ++i) { + FX_SAFE_DWORD safeDeltaObj = hStream->GetBits(dwDeltaObjectsBits); + safeDeltaObj += dwObjLeastNum; + if (!safeDeltaObj.IsValid()) + return FALSE; + m_dwDeltaNObjsArray.Add(safeDeltaObj.ValueOrDie()); + } + hStream->ByteAlign(); + + required_bits = dwDeltaPageLenBits; + required_bits *= pdfium::base::checked_cast<FX_DWORD>(nPages); + if (!CanReadFromBitStream(hStream, required_bits)) + return FALSE; + + CFX_DWordArray dwPageLenArray; + for (int i = 0; i < nPages; ++i) { + FX_SAFE_DWORD safePageLen = hStream->GetBits(dwDeltaPageLenBits); + safePageLen += dwPageLeastLen; + if (!safePageLen.IsValid()) + return FALSE; + dwPageLenArray.Add(safePageLen.ValueOrDie()); + } + + CPDF_Object* pOffsetE = m_pLinearizedDict->GetElementValue("E"); + int nOffsetE = pOffsetE ? pOffsetE->GetInteger() : -1; + if (nOffsetE < 0) + return FALSE; + + CPDF_Object* pFirstPageNum = m_pLinearizedDict->GetElementValue("P"); + int nFirstPageNum = pFirstPageNum ? pFirstPageNum->GetInteger() : 0; + for (int i = 0; i < nPages; ++i) { + if (i == nFirstPageNum) { + m_szPageOffsetArray.push_back(m_szFirstPageObjOffset); + } else if (i == nFirstPageNum + 1) { + if (i == 1) { + m_szPageOffsetArray.push_back(nOffsetE); + } else { + m_szPageOffsetArray.push_back(m_szPageOffsetArray[i - 2] + + dwPageLenArray[i - 2]); + } + } else { + if (i == 0) { + m_szPageOffsetArray.push_back(nOffsetE); + } else { + m_szPageOffsetArray.push_back(m_szPageOffsetArray[i - 1] + + dwPageLenArray[i - 1]); + } + } + } + + if (nPages > 0) { + m_szPageOffsetArray.push_back(m_szPageOffsetArray[nPages - 1] + + dwPageLenArray[nPages - 1]); + } + hStream->ByteAlign(); + + // Number of shared objects. + required_bits = dwSharedObjBits; + required_bits *= pdfium::base::checked_cast<FX_DWORD>(nPages); + if (!CanReadFromBitStream(hStream, required_bits)) + return FALSE; + + for (int i = 0; i < nPages; i++) + m_dwNSharedObjsArray.Add(hStream->GetBits(dwSharedObjBits)); + hStream->ByteAlign(); + + // Array of identifiers, size = nshared_objects. + for (int i = 0; i < nPages; i++) { + required_bits = dwSharedIdBits; + required_bits *= m_dwNSharedObjsArray[i]; + if (!CanReadFromBitStream(hStream, required_bits)) + return FALSE; + + for (int j = 0; j < m_dwNSharedObjsArray[i]; j++) + m_dwIdentifierArray.Add(hStream->GetBits(dwSharedIdBits)); + } + hStream->ByteAlign(); + + for (int i = 0; i < nPages; i++) { + FX_SAFE_DWORD safeSize = m_dwNSharedObjsArray[i]; + safeSize *= dwSharedNumeratorBits; + if (!CanReadFromBitStream(hStream, safeSize)) + return FALSE; + + hStream->SkipBits(safeSize.ValueOrDie()); + } + hStream->ByteAlign(); + + FX_SAFE_DWORD safeTotalPageLen = pdfium::base::checked_cast<FX_DWORD>(nPages); + safeTotalPageLen *= dwDeltaPageLenBits; + if (!CanReadFromBitStream(hStream, safeTotalPageLen)) + return FALSE; + + hStream->SkipBits(safeTotalPageLen.ValueOrDie()); + hStream->ByteAlign(); + return TRUE; +} + +FX_BOOL CPDF_HintTables::ReadSharedObjHintTable(CFX_BitStream* hStream, + FX_DWORD offset) { + if (!hStream || hStream->IsEOF()) + return FALSE; + + int nStreamOffset = ReadPrimaryHintStreamOffset(); + int nStreamLen = ReadPrimaryHintStreamLength(); + if (nStreamOffset < 0 || nStreamLen < 1) + return FALSE; + + FX_SAFE_DWORD bit_offset = offset; + bit_offset *= 8; + if (!bit_offset.IsValid() || hStream->GetPos() > bit_offset.ValueOrDie()) + return FALSE; + hStream->SkipBits(bit_offset.ValueOrDie() - hStream->GetPos()); + + const FX_DWORD kHeaderSize = 192; + if (hStream->BitsRemaining() < kHeaderSize) + return FALSE; + + // Item 1: The object number of the first object in the shared objects + // section. + FX_DWORD dwFirstSharedObjNum = hStream->GetBits(32); + + // Item 2: The location of the first object in the shared objects section. + FX_DWORD dwFirstSharedObjLoc = hStream->GetBits(32); + if (dwFirstSharedObjLoc > nStreamOffset) + dwFirstSharedObjLoc += nStreamLen; + + // Item 3: The number of shared object entries for the first page. + m_nFirstPageSharedObjs = hStream->GetBits(32); + + // Item 4: The number of shared object entries for the shared objects + // section, including the number of shared object entries for the first page. + FX_DWORD dwSharedObjTotal = hStream->GetBits(32); + + // Item 5: The number of bits needed to represent the greatest number of + // objects in a shared object group. Skipped. + hStream->SkipBits(16); + + // Item 6: The least length of a shared object group in bytes. + FX_DWORD dwGroupLeastLen = hStream->GetBits(32); + + // Item 7: The number of bits needed to represent the difference between the + // greatest and least length of a shared object group, in bytes. + FX_DWORD dwDeltaGroupLen = hStream->GetBits(16); + CPDF_Object* pFirstPageObj = m_pLinearizedDict->GetElementValue("O"); + int nFirstPageObjNum = pFirstPageObj ? pFirstPageObj->GetInteger() : -1; + if (nFirstPageObjNum < 0) + return FALSE; + + FX_DWORD dwPrevObjLen = 0; + FX_DWORD dwCurObjLen = 0; + FX_SAFE_DWORD required_bits = dwSharedObjTotal; + required_bits *= dwDeltaGroupLen; + if (!CanReadFromBitStream(hStream, required_bits)) + return FALSE; + + for (FX_DWORD i = 0; i < dwSharedObjTotal; ++i) { + dwPrevObjLen = dwCurObjLen; + FX_SAFE_DWORD safeObjLen = hStream->GetBits(dwDeltaGroupLen); + safeObjLen += dwGroupLeastLen; + if (!safeObjLen.IsValid()) + return FALSE; + + dwCurObjLen = safeObjLen.ValueOrDie(); + if (i < m_nFirstPageSharedObjs) { + m_dwSharedObjNumArray.Add(nFirstPageObjNum + i); + if (i == 0) + m_szSharedObjOffsetArray.push_back(m_szFirstPageObjOffset); + } else { + FX_SAFE_DWORD safeObjNum = dwFirstSharedObjNum; + safeObjNum += i - m_nFirstPageSharedObjs; + if (!safeObjNum.IsValid()) + return FALSE; + + m_dwSharedObjNumArray.Add(safeObjNum.ValueOrDie()); + if (i == m_nFirstPageSharedObjs) { + m_szSharedObjOffsetArray.push_back( + pdfium::base::checked_cast<int32_t>(dwFirstSharedObjLoc)); + } + } + + if (i != 0 && i != m_nFirstPageSharedObjs) { + FX_SAFE_INT32 safeLoc = pdfium::base::checked_cast<int32_t>(dwPrevObjLen); + safeLoc += m_szSharedObjOffsetArray[i - 1]; + if (!safeLoc.IsValid()) + return FALSE; + + m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie()); + } + } + + if (dwSharedObjTotal > 0) { + FX_SAFE_INT32 safeLoc = pdfium::base::checked_cast<int32_t>(dwCurObjLen); + safeLoc += m_szSharedObjOffsetArray[dwSharedObjTotal - 1]; + if (!safeLoc.IsValid()) + return FALSE; + + m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie()); + } + + hStream->ByteAlign(); + if (hStream->BitsRemaining() < dwSharedObjTotal) + return FALSE; + + hStream->SkipBits(dwSharedObjTotal); + hStream->ByteAlign(); + return TRUE; +} + +FX_BOOL CPDF_HintTables::GetPagePos(int index, + FX_FILESIZE& szPageStartPos, + FX_FILESIZE& szPageLength, + FX_DWORD& dwObjNum) { + if (!m_pLinearizedDict) + return FALSE; + + szPageStartPos = m_szPageOffsetArray[index]; + szPageLength = GetItemLength(index, m_szPageOffsetArray); + + CPDF_Object* pFirstPageNum = m_pLinearizedDict->GetElementValue("P"); + int nFirstPageNum = pFirstPageNum ? pFirstPageNum->GetInteger() : 0; + + CPDF_Object* pFirstPageObjNum = m_pLinearizedDict->GetElementValue("O"); + if (!pFirstPageObjNum) + return FALSE; + + int nFirstPageObjNum = pFirstPageObjNum->GetInteger(); + if (index == nFirstPageNum) { + dwObjNum = nFirstPageObjNum; + return TRUE; + } + + // The object number of remaining pages starts from 1. + dwObjNum = 1; + for (int i = 0; i < index; ++i) { + if (i == nFirstPageNum) + continue; + dwObjNum += m_dwDeltaNObjsArray[i]; + } + return TRUE; +} + +IPDF_DataAvail::DocAvailStatus CPDF_HintTables::CheckPage( + int index, + IPDF_DataAvail::DownloadHints* pHints) { + if (!m_pLinearizedDict || !pHints) + return IPDF_DataAvail::DataError; + + CPDF_Object* pFirstAvailPage = m_pLinearizedDict->GetElementValue("P"); + int nFirstAvailPage = pFirstAvailPage ? pFirstAvailPage->GetInteger() : 0; + if (index == nFirstAvailPage) + return IPDF_DataAvail::DataAvailable; + + FX_DWORD dwLength = GetItemLength(index, m_szPageOffsetArray); + // If two pages have the same offset, it should be treated as an error. + if (!dwLength) + return IPDF_DataAvail::DataError; + + if (!m_pDataAvail->IsDataAvail(m_szPageOffsetArray[index], dwLength, pHints)) + return IPDF_DataAvail::DataNotAvailable; + + // Download data of shared objects in the page. + FX_DWORD offset = 0; + for (int i = 0; i < index; ++i) + offset += m_dwNSharedObjsArray[i]; + + CPDF_Object* pFirstPageObj = m_pLinearizedDict->GetElementValue("O"); + int nFirstPageObjNum = pFirstPageObj ? pFirstPageObj->GetInteger() : -1; + if (nFirstPageObjNum < 0) + return IPDF_DataAvail::DataError; + + FX_DWORD dwIndex = 0; + FX_DWORD dwObjNum = 0; + for (int j = 0; j < m_dwNSharedObjsArray[index]; ++j) { + dwIndex = m_dwIdentifierArray[offset + j]; + if (dwIndex >= m_dwSharedObjNumArray.GetSize()) + return IPDF_DataAvail::DataNotAvailable; + + dwObjNum = m_dwSharedObjNumArray[dwIndex]; + if (dwObjNum >= nFirstPageObjNum && + dwObjNum < nFirstPageObjNum + m_nFirstPageSharedObjs) { + continue; + } + + dwLength = GetItemLength(dwIndex, m_szSharedObjOffsetArray); + // If two objects have the same offset, it should be treated as an error. + if (!dwLength) + return IPDF_DataAvail::DataError; + + if (!m_pDataAvail->IsDataAvail(m_szSharedObjOffsetArray[dwIndex], dwLength, + pHints)) { + return IPDF_DataAvail::DataNotAvailable; + } + } + return IPDF_DataAvail::DataAvailable; +} + +FX_BOOL CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) { + if (!pHintStream || !m_pLinearizedDict) + return FALSE; + + CPDF_Dictionary* pDict = pHintStream->GetDict(); + CPDF_Object* pOffset = pDict ? pDict->GetElement("S") : nullptr; + if (!pOffset || !pOffset->IsNumber()) + return FALSE; + + int shared_hint_table_offset = pOffset->GetInteger(); + CPDF_StreamAcc acc; + acc.LoadAllData(pHintStream); + + FX_DWORD size = acc.GetSize(); + // The header section of page offset hint table is 36 bytes. + // The header section of shared object hint table is 24 bytes. + // Hint table has at least 60 bytes. + const FX_DWORD MIN_STREAM_LEN = 60; + if (size < MIN_STREAM_LEN || shared_hint_table_offset <= 0 || + size < shared_hint_table_offset) { + return FALSE; + } + + CFX_BitStream bs; + bs.Init(acc.GetData(), size); + return ReadPageHintTable(&bs) && + ReadSharedObjHintTable(&bs, pdfium::base::checked_cast<FX_DWORD>( + shared_hint_table_offset)); +} + +int CPDF_HintTables::ReadPrimaryHintStreamOffset() const { + if (!m_pLinearizedDict) + return -1; + + CPDF_Array* pRange = m_pLinearizedDict->GetArrayBy("H"); + if (!pRange) + return -1; + + CPDF_Object* pStreamOffset = pRange->GetElementValue(0); + if (!pStreamOffset) + return -1; + + return pStreamOffset->GetInteger(); +} + +int CPDF_HintTables::ReadPrimaryHintStreamLength() const { + if (!m_pLinearizedDict) + return -1; + + CPDF_Array* pRange = m_pLinearizedDict->GetArrayBy("H"); + if (!pRange) + return -1; + + CPDF_Object* pStreamLen = pRange->GetElementValue(1); + if (!pStreamLen) + return -1; + + return pStreamLen->GetInteger(); +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_hint_tables.h b/core/fpdfapi/fpdf_parser/cpdf_hint_tables.h new file mode 100644 index 0000000000..a9a89b48bf --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_hint_tables.h @@ -0,0 +1,62 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_FPDFAPI_FPDF_PARSER_CPDF_HINT_TABLES_H_ +#define CORE_FPDFAPI_FPDF_PARSER_CPDF_HINT_TABLES_H_ + +#include <vector> + +#include "core/include/fpdfapi/ipdf_data_avail.h" +#include "core/include/fxcrt/fx_basic.h" +#include "core/include/fxcrt/fx_stream.h" + +class CFX_BitStream; +class CPDF_DataAvail; +class CPDF_Dictionary; +class CPDF_Stream; + +class CPDF_HintTables { + public: + CPDF_HintTables(CPDF_DataAvail* pDataAvail, CPDF_Dictionary* pLinearized) + : m_pLinearizedDict(pLinearized), + m_pDataAvail(pDataAvail), + m_nFirstPageSharedObjs(0), + m_szFirstPageObjOffset(0) {} + ~CPDF_HintTables(); + + FX_BOOL GetPagePos(int index, + FX_FILESIZE& szPageStartPos, + FX_FILESIZE& szPageLength, + FX_DWORD& dwObjNum); + + IPDF_DataAvail::DocAvailStatus CheckPage( + int index, + IPDF_DataAvail::DownloadHints* pHints); + + FX_BOOL LoadHintStream(CPDF_Stream* pHintStream); + + protected: + FX_BOOL ReadPageHintTable(CFX_BitStream* hStream); + FX_BOOL ReadSharedObjHintTable(CFX_BitStream* hStream, FX_DWORD offset); + FX_DWORD GetItemLength(int index, const std::vector<FX_FILESIZE>& szArray); + + private: + int ReadPrimaryHintStreamOffset() const; + int ReadPrimaryHintStreamLength() const; + + CPDF_Dictionary* m_pLinearizedDict; + CPDF_DataAvail* m_pDataAvail; + FX_DWORD m_nFirstPageSharedObjs; + FX_FILESIZE m_szFirstPageObjOffset; + CFX_DWordArray m_dwDeltaNObjsArray; + CFX_DWordArray m_dwNSharedObjsArray; + CFX_DWordArray m_dwSharedObjNumArray; + CFX_DWordArray m_dwIdentifierArray; + std::vector<FX_FILESIZE> m_szPageOffsetArray; + std::vector<FX_FILESIZE> m_szSharedObjOffsetArray; +}; + +#endif // CORE_FPDFAPI_FPDF_PARSER_CPDF_HINT_TABLES_H_ diff --git a/core/fpdfapi/fpdf_parser/cpdf_indirect_object_holder.cpp b/core/fpdfapi/fpdf_parser/cpdf_indirect_object_holder.cpp new file mode 100644 index 0000000000..b1606f9857 --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_indirect_object_holder.cpp @@ -0,0 +1,80 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/include/fpdfapi/cpdf_indirect_object_holder.h" +#include "core/include/fpdfapi/cpdf_object.h" +#include "core/include/fpdfapi/cpdf_parser.h" + +CPDF_IndirectObjectHolder::CPDF_IndirectObjectHolder(CPDF_Parser* pParser) + : m_pParser(pParser), m_LastObjNum(0) { + if (pParser) + m_LastObjNum = m_pParser->GetLastObjNum(); +} + +CPDF_IndirectObjectHolder::~CPDF_IndirectObjectHolder() { + for (const auto& pair : m_IndirectObjs) + pair.second->Destroy(); +} + +CPDF_Object* CPDF_IndirectObjectHolder::GetIndirectObject(FX_DWORD objnum) { + if (objnum == 0) + return nullptr; + + auto it = m_IndirectObjs.find(objnum); + if (it != m_IndirectObjs.end()) + return it->second->GetObjNum() != -1 ? it->second : nullptr; + + if (!m_pParser) + return nullptr; + + CPDF_Object* pObj = m_pParser->ParseIndirectObject(this, objnum); + if (!pObj) + return nullptr; + + pObj->m_ObjNum = objnum; + m_LastObjNum = std::max(m_LastObjNum, objnum); + if (m_IndirectObjs[objnum]) + m_IndirectObjs[objnum]->Destroy(); + + m_IndirectObjs[objnum] = pObj; + return pObj; +} + +FX_DWORD CPDF_IndirectObjectHolder::AddIndirectObject(CPDF_Object* pObj) { + if (pObj->m_ObjNum) + return pObj->m_ObjNum; + + m_LastObjNum++; + m_IndirectObjs[m_LastObjNum] = pObj; + pObj->m_ObjNum = m_LastObjNum; + return m_LastObjNum; +} + +void CPDF_IndirectObjectHolder::ReleaseIndirectObject(FX_DWORD objnum) { + auto it = m_IndirectObjs.find(objnum); + if (it == m_IndirectObjs.end() || it->second->GetObjNum() == -1) + return; + it->second->Destroy(); + m_IndirectObjs.erase(it); +} + +FX_BOOL CPDF_IndirectObjectHolder::InsertIndirectObject(FX_DWORD objnum, + CPDF_Object* pObj) { + if (!objnum || !pObj) + return FALSE; + auto it = m_IndirectObjs.find(objnum); + if (it != m_IndirectObjs.end()) { + if (pObj->GetGenNum() <= it->second->GetGenNum()) { + pObj->Destroy(); + return FALSE; + } + it->second->Destroy(); + } + pObj->m_ObjNum = objnum; + m_IndirectObjs[objnum] = pObj; + m_LastObjNum = std::max(m_LastObjNum, objnum); + return TRUE; +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_name.cpp b/core/fpdfapi/fpdf_parser/cpdf_name.cpp new file mode 100644 index 0000000000..03d523db19 --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_name.cpp @@ -0,0 +1,53 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/include/fpdfapi/cpdf_name.h" + +#include "core/include/fpdfapi/fpdf_parser_decode.h" + +CPDF_Name::CPDF_Name(const CFX_ByteString& str) : m_Name(str) {} + +CPDF_Name::CPDF_Name(const CFX_ByteStringC& str) : m_Name(str) {} + +CPDF_Name::CPDF_Name(const FX_CHAR* str) : m_Name(str) {} + +CPDF_Name::~CPDF_Name() {} + +CPDF_Object::Type CPDF_Name::GetType() const { + return NAME; +} + +CPDF_Object* CPDF_Name::Clone(FX_BOOL bDirect) const { + return new CPDF_Name(m_Name); +} + +CFX_ByteString CPDF_Name::GetString() const { + return m_Name; +} + +CFX_ByteStringC CPDF_Name::GetConstString() const { + return CFX_ByteStringC(m_Name); +} + +void CPDF_Name::SetString(const CFX_ByteString& str) { + m_Name = str; +} + +bool CPDF_Name::IsName() const { + return true; +} + +CPDF_Name* CPDF_Name::AsName() { + return this; +} + +const CPDF_Name* CPDF_Name::AsName() const { + return this; +} + +CFX_WideString CPDF_Name::GetUnicodeText() const { + return PDF_DecodeText(m_Name); +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_null.cpp b/core/fpdfapi/fpdf_parser/cpdf_null.cpp new file mode 100644 index 0000000000..c18449d08f --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_null.cpp @@ -0,0 +1,17 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/include/fpdfapi/cpdf_null.h" + +CPDF_Null::CPDF_Null() {} + +CPDF_Object::Type CPDF_Null::GetType() const { + return NULLOBJ; +} + +CPDF_Object* CPDF_Null::Clone(FX_BOOL bDirect) const { + return new CPDF_Null; +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_number.cpp b/core/fpdfapi/fpdf_parser/cpdf_number.cpp new file mode 100644 index 0000000000..ddc7aa11a2 --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_number.cpp @@ -0,0 +1,56 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/include/fpdfapi/cpdf_number.h" + +CPDF_Number::CPDF_Number() : m_bInteger(TRUE), m_Integer(0) {} + +CPDF_Number::CPDF_Number(int value) : m_bInteger(TRUE), m_Integer(value) {} + +CPDF_Number::CPDF_Number(FX_FLOAT value) : m_bInteger(FALSE), m_Float(value) {} + +CPDF_Number::CPDF_Number(const CFX_ByteStringC& str) { + FX_atonum(str, m_bInteger, &m_Integer); +} + +CPDF_Number::~CPDF_Number() {} + +CPDF_Object::Type CPDF_Number::GetType() const { + return NUMBER; +} + +CPDF_Object* CPDF_Number::Clone(FX_BOOL bDirect) const { + return m_bInteger ? new CPDF_Number(m_Integer) : new CPDF_Number(m_Float); +} + +FX_FLOAT CPDF_Number::GetNumber() const { + return m_bInteger ? static_cast<FX_FLOAT>(m_Integer) : m_Float; +} + +int CPDF_Number::GetInteger() const { + return m_bInteger ? m_Integer : static_cast<int>(m_Float); +} + +bool CPDF_Number::IsNumber() const { + return true; +} + +CPDF_Number* CPDF_Number::AsNumber() { + return this; +} + +const CPDF_Number* CPDF_Number::AsNumber() const { + return this; +} + +void CPDF_Number::SetString(const CFX_ByteString& str) { + FX_atonum(str, m_bInteger, &m_Integer); +} + +CFX_ByteString CPDF_Number::GetString() const { + return m_bInteger ? CFX_ByteString::FormatInteger(m_Integer, FXFORMAT_SIGNED) + : CFX_ByteString::FormatFloat(m_Float); +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_object.cpp b/core/fpdfapi/fpdf_parser/cpdf_object.cpp new file mode 100644 index 0000000000..208bca8e6c --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_object.cpp @@ -0,0 +1,158 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/include/fpdfapi/cpdf_object.h" + +#include <algorithm> + +#include "core/include/fpdfapi/cpdf_array.h" +#include "core/include/fpdfapi/cpdf_dictionary.h" +#include "core/include/fpdfapi/cpdf_indirect_object_holder.h" +#include "core/include/fpdfapi/cpdf_parser.h" +#include "core/include/fpdfapi/fpdf_parser_decode.h" +#include "core/include/fxcrt/fx_string.h" +#include "third_party/base/stl_util.h" + +CPDF_Object::~CPDF_Object() {} + +CPDF_Object* CPDF_Object::GetDirect() const { + return const_cast<CPDF_Object*>(this); +} + +void CPDF_Object::Release() { + if (m_ObjNum) + return; + + Destroy(); +} + +CFX_ByteString CPDF_Object::GetString() const { + return CFX_ByteString(); +} + +CFX_ByteStringC CPDF_Object::GetConstString() const { + return CFX_ByteStringC(); +} + +CFX_WideString CPDF_Object::GetUnicodeText() const { + return CFX_WideString(); +} + +FX_FLOAT CPDF_Object::GetNumber() const { + return 0; +} + +int CPDF_Object::GetInteger() const { + return 0; +} + +CPDF_Dictionary* CPDF_Object::GetDict() const { + return nullptr; +} + +CPDF_Array* CPDF_Object::GetArray() const { + return nullptr; +} + +void CPDF_Object::SetString(const CFX_ByteString& str) { + ASSERT(FALSE); +} + +bool CPDF_Object::IsArray() const { + return false; +} + +bool CPDF_Object::IsBoolean() const { + return false; +} + +bool CPDF_Object::IsDictionary() const { + return false; +} + +bool CPDF_Object::IsName() const { + return false; +} + +bool CPDF_Object::IsNumber() const { + return false; +} + +bool CPDF_Object::IsReference() const { + return false; +} + +bool CPDF_Object::IsStream() const { + return false; +} + +bool CPDF_Object::IsString() const { + return false; +} + +CPDF_Array* CPDF_Object::AsArray() { + return nullptr; +} + +const CPDF_Array* CPDF_Object::AsArray() const { + return nullptr; +} + +CPDF_Boolean* CPDF_Object::AsBoolean() { + return nullptr; +} + +const CPDF_Boolean* CPDF_Object::AsBoolean() const { + return nullptr; +} + +CPDF_Dictionary* CPDF_Object::AsDictionary() { + return nullptr; +} + +const CPDF_Dictionary* CPDF_Object::AsDictionary() const { + return nullptr; +} + +CPDF_Name* CPDF_Object::AsName() { + return nullptr; +} + +const CPDF_Name* CPDF_Object::AsName() const { + return nullptr; +} + +CPDF_Number* CPDF_Object::AsNumber() { + return nullptr; +} + +const CPDF_Number* CPDF_Object::AsNumber() const { + return nullptr; +} + +CPDF_Reference* CPDF_Object::AsReference() { + return nullptr; +} + +const CPDF_Reference* CPDF_Object::AsReference() const { + return nullptr; +} + +CPDF_Stream* CPDF_Object::AsStream() { + return nullptr; +} + +const CPDF_Stream* CPDF_Object::AsStream() const { + return nullptr; +} + +CPDF_String* CPDF_Object::AsString() { + return nullptr; +} + +const CPDF_String* CPDF_Object::AsString() const { + return nullptr; +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_object_unittest.cpp b/core/fpdfapi/fpdf_parser/cpdf_object_unittest.cpp new file mode 100644 index 0000000000..971bdaf20e --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_object_unittest.cpp @@ -0,0 +1,776 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/include/fpdfapi/cpdf_array.h" +#include "core/include/fpdfapi/cpdf_boolean.h" +#include "core/include/fpdfapi/cpdf_dictionary.h" +#include "core/include/fpdfapi/cpdf_name.h" +#include "core/include/fpdfapi/cpdf_null.h" +#include "core/include/fpdfapi/cpdf_number.h" +#include "core/include/fpdfapi/cpdf_reference.h" +#include "core/include/fpdfapi/cpdf_stream.h" +#include "core/include/fpdfapi/cpdf_string.h" + +#include <memory> +#include <string> +#include <vector> + +#include "core/include/fpdfapi/cpdf_indirect_object_holder.h" +#include "core/include/fxcrt/fx_basic.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace { + +using ScopedArray = std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>>; + +void TestArrayAccessors(const CPDF_Array* arr, + size_t index, + const char* str_val, + const char* const_str_val, + int int_val, + float float_val, + CPDF_Array* arr_val, + CPDF_Dictionary* dict_val, + CPDF_Stream* stream_val) { + EXPECT_STREQ(str_val, arr->GetStringAt(index).c_str()); + EXPECT_STREQ(const_str_val, arr->GetConstStringAt(index).GetCStr()); + EXPECT_EQ(int_val, arr->GetIntegerAt(index)); + EXPECT_EQ(float_val, arr->GetNumberAt(index)); + EXPECT_EQ(float_val, arr->GetFloatAt(index)); + EXPECT_EQ(arr_val, arr->GetArrayAt(index)); + EXPECT_EQ(dict_val, arr->GetDictAt(index)); + EXPECT_EQ(stream_val, arr->GetStreamAt(index)); +} + +} // namespace + +class PDFObjectsTest : public testing::Test { + public: + void SetUp() override { + // Initialize different kinds of objects. + // Boolean objects. + CPDF_Boolean* boolean_false_obj = new CPDF_Boolean(false); + CPDF_Boolean* boolean_true_obj = new CPDF_Boolean(true); + // Number objects. + CPDF_Number* number_int_obj = new CPDF_Number(1245); + CPDF_Number* number_float_obj = new CPDF_Number(9.00345f); + // String objects. + CPDF_String* str_reg_obj = new CPDF_String(L"A simple test"); + CPDF_String* str_spec_obj = new CPDF_String(L"\t\n"); + // Name object. + CPDF_Name* name_obj = new CPDF_Name("space"); + // Array object. + m_ArrayObj = new CPDF_Array; + m_ArrayObj->InsertAt(0, new CPDF_Number(8902)); + m_ArrayObj->InsertAt(1, new CPDF_Name("address")); + // Dictionary object. + m_DictObj = new CPDF_Dictionary; + m_DictObj->SetAt("bool", new CPDF_Boolean(false)); + m_DictObj->SetAt("num", new CPDF_Number(0.23f)); + // Stream object. + const char content[] = "abcdefghijklmnopqrstuvwxyz"; + size_t buf_len = FX_ArraySize(content); + uint8_t* buf = reinterpret_cast<uint8_t*>(malloc(buf_len)); + memcpy(buf, content, buf_len); + m_StreamDictObj = new CPDF_Dictionary; + m_StreamDictObj->SetAt("key1", new CPDF_String(L" test dict")); + m_StreamDictObj->SetAt("key2", new CPDF_Number(-1)); + CPDF_Stream* stream_obj = new CPDF_Stream(buf, buf_len, m_StreamDictObj); + // Null Object. + CPDF_Null* null_obj = new CPDF_Null; + // All direct objects. + CPDF_Object* objs[] = {boolean_false_obj, boolean_true_obj, number_int_obj, + number_float_obj, str_reg_obj, str_spec_obj, + name_obj, m_ArrayObj, m_DictObj, + stream_obj, null_obj}; + m_DirectObjTypes = { + CPDF_Object::BOOLEAN, CPDF_Object::BOOLEAN, CPDF_Object::NUMBER, + CPDF_Object::NUMBER, CPDF_Object::STRING, CPDF_Object::STRING, + CPDF_Object::NAME, CPDF_Object::ARRAY, CPDF_Object::DICTIONARY, + CPDF_Object::STREAM, CPDF_Object::NULLOBJ}; + for (size_t i = 0; i < FX_ArraySize(objs); ++i) + m_DirectObjs.emplace_back(objs[i]); + + // Indirect references to indirect objects. + m_ObjHolder.reset(new CPDF_IndirectObjectHolder(nullptr)); + m_IndirectObjs = {boolean_true_obj, number_int_obj, str_spec_obj, name_obj, + m_ArrayObj, m_DictObj, stream_obj}; + for (size_t i = 0; i < m_IndirectObjs.size(); ++i) { + m_ObjHolder->AddIndirectObject(m_IndirectObjs[i]); + m_RefObjs.emplace_back(new CPDF_Reference( + m_ObjHolder.get(), m_IndirectObjs[i]->GetObjNum())); + } + } + + bool Equal(CPDF_Object* obj1, CPDF_Object* obj2) { + if (obj1 == obj2) + return true; + if (!obj1 || !obj2 || obj1->GetType() != obj2->GetType()) + return false; + switch (obj1->GetType()) { + case CPDF_Object::BOOLEAN: + return obj1->GetInteger() == obj2->GetInteger(); + case CPDF_Object::NUMBER: + return obj1->AsNumber()->IsInteger() == obj2->AsNumber()->IsInteger() && + obj1->GetInteger() == obj2->GetInteger(); + case CPDF_Object::STRING: + case CPDF_Object::NAME: + return obj1->GetString() == obj2->GetString(); + case CPDF_Object::ARRAY: { + const CPDF_Array* array1 = obj1->AsArray(); + const CPDF_Array* array2 = obj2->AsArray(); + if (array1->GetCount() != array2->GetCount()) + return false; + for (size_t i = 0; i < array1->GetCount(); ++i) { + if (!Equal(array1->GetElement(i), array2->GetElement(i))) + return false; + } + return true; + } + case CPDF_Object::DICTIONARY: { + const CPDF_Dictionary* dict1 = obj1->AsDictionary(); + const CPDF_Dictionary* dict2 = obj2->AsDictionary(); + if (dict1->GetCount() != dict2->GetCount()) + return false; + for (CPDF_Dictionary::const_iterator it = dict1->begin(); + it != dict1->end(); ++it) { + if (!Equal(it->second, dict2->GetElement(it->first))) + return false; + } + return true; + } + case CPDF_Object::NULLOBJ: + return true; + case CPDF_Object::STREAM: { + const CPDF_Stream* stream1 = obj1->AsStream(); + const CPDF_Stream* stream2 = obj2->AsStream(); + if (!stream1->GetDict() && !stream2->GetDict()) + return true; + // Compare dictionaries. + if (!Equal(stream1->GetDict(), stream2->GetDict())) + return false; + // Compare sizes. + if (stream1->GetRawSize() != stream2->GetRawSize()) + return false; + // Compare contents. + // Since this function is used for testing Clone(), only memory based + // streams need to be handled. + if (!stream1->IsMemoryBased() || !stream2->IsMemoryBased()) + return false; + return FXSYS_memcmp(stream1->GetRawData(), stream2->GetRawData(), + stream1->GetRawSize()) == 0; + } + case CPDF_Object::REFERENCE: + return obj1->AsReference()->GetRefObjNum() == + obj2->AsReference()->GetRefObjNum(); + } + return false; + } + + protected: + using ScopedObj = std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>>; + + // m_ObjHolder needs to be declared first and destructed last since it also + // refers to some objects in m_DirectObjs. + std::unique_ptr<CPDF_IndirectObjectHolder> m_ObjHolder; + std::vector<ScopedObj> m_DirectObjs; + std::vector<int> m_DirectObjTypes; + std::vector<ScopedObj> m_RefObjs; + CPDF_Dictionary* m_DictObj; + CPDF_Dictionary* m_StreamDictObj; + CPDF_Array* m_ArrayObj; + std::vector<CPDF_Object*> m_IndirectObjs; +}; + +TEST_F(PDFObjectsTest, GetString) { + const char* direct_obj_results[] = { + "false", "true", "1245", "9.00345", "A simple test", "\t\n", "space", + "", "", "", ""}; + // Check for direct objects. + for (size_t i = 0; i < m_DirectObjs.size(); ++i) + EXPECT_STREQ(direct_obj_results[i], m_DirectObjs[i]->GetString().c_str()); + + // Check indirect references. + const char* indirect_obj_results[] = {"true", "1245", "\t\n", "space", + "", "", ""}; + for (size_t i = 0; i < m_RefObjs.size(); ++i) { + EXPECT_STREQ(indirect_obj_results[i], m_RefObjs[i]->GetString().c_str()); + } +} + +TEST_F(PDFObjectsTest, GetConstString) { + const char* direct_obj_results[] = { + nullptr, nullptr, nullptr, nullptr, "A simple test", "\t\n", + "space", nullptr, nullptr, nullptr, nullptr}; + // Check for direct objects. + for (size_t i = 0; i < m_DirectObjs.size(); ++i) { + if (!direct_obj_results[i]) { + EXPECT_EQ(direct_obj_results[i], + m_DirectObjs[i]->GetConstString().GetCStr()); + } else { + EXPECT_STREQ(direct_obj_results[i], + m_DirectObjs[i]->GetConstString().GetCStr()); + } + } + // Check indirect references. + const char* indirect_obj_results[] = {nullptr, nullptr, "\t\n", "space", + nullptr, nullptr, nullptr}; + for (size_t i = 0; i < m_RefObjs.size(); ++i) { + if (!indirect_obj_results[i]) { + EXPECT_EQ(nullptr, m_RefObjs[i]->GetConstString().GetCStr()); + } else { + EXPECT_STREQ(indirect_obj_results[i], + m_RefObjs[i]->GetConstString().GetCStr()); + } + } +} + +TEST_F(PDFObjectsTest, GetUnicodeText) { + const wchar_t* direct_obj_results[] = { + L"", L"", L"", L"", L"A simple test", + L"\t\n", L"space", L"", L"", L"abcdefghijklmnopqrstuvwxyz", + L""}; + // Check for direct objects. + for (size_t i = 0; i < m_DirectObjs.size(); ++i) + EXPECT_STREQ(direct_obj_results[i], + m_DirectObjs[i]->GetUnicodeText().c_str()); + + // Check indirect references. + for (const auto& it : m_RefObjs) + EXPECT_STREQ(L"", it->GetUnicodeText().c_str()); +} + +TEST_F(PDFObjectsTest, GetNumber) { + const FX_FLOAT direct_obj_results[] = {0, 0, 1245, 9.00345f, 0, 0, + 0, 0, 0, 0, 0}; + // Check for direct objects. + for (size_t i = 0; i < m_DirectObjs.size(); ++i) + EXPECT_EQ(direct_obj_results[i], m_DirectObjs[i]->GetNumber()); + + // Check indirect references. + const FX_FLOAT indirect_obj_results[] = {0, 1245, 0, 0, 0, 0, 0}; + for (size_t i = 0; i < m_RefObjs.size(); ++i) + EXPECT_EQ(indirect_obj_results[i], m_RefObjs[i]->GetNumber()); +} + +TEST_F(PDFObjectsTest, GetInteger) { + const int direct_obj_results[] = {0, 1, 1245, 9, 0, 0, 0, 0, 0, 0, 0}; + // Check for direct objects. + for (size_t i = 0; i < m_DirectObjs.size(); ++i) + EXPECT_EQ(direct_obj_results[i], m_DirectObjs[i]->GetInteger()); + + // Check indirect references. + const int indirect_obj_results[] = {1, 1245, 0, 0, 0, 0, 0}; + for (size_t i = 0; i < m_RefObjs.size(); ++i) + EXPECT_EQ(indirect_obj_results[i], m_RefObjs[i]->GetInteger()); +} + +TEST_F(PDFObjectsTest, GetDict) { + const CPDF_Dictionary* direct_obj_results[] = { + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, m_DictObj, m_StreamDictObj, nullptr}; + // Check for direct objects. + for (size_t i = 0; i < m_DirectObjs.size(); ++i) + EXPECT_EQ(direct_obj_results[i], m_DirectObjs[i]->GetDict()); + + // Check indirect references. + const CPDF_Dictionary* indirect_obj_results[] = { + nullptr, nullptr, nullptr, nullptr, nullptr, m_DictObj, m_StreamDictObj}; + for (size_t i = 0; i < m_RefObjs.size(); ++i) + EXPECT_EQ(indirect_obj_results[i], m_RefObjs[i]->GetDict()); +} + +TEST_F(PDFObjectsTest, GetArray) { + const CPDF_Array* direct_obj_results[] = { + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, m_ArrayObj, nullptr, nullptr, nullptr}; + // Check for direct objects. + for (size_t i = 0; i < m_DirectObjs.size(); ++i) + EXPECT_EQ(direct_obj_results[i], m_DirectObjs[i]->GetArray()); + + // Check indirect references. + for (const auto& it : m_RefObjs) + EXPECT_EQ(nullptr, it->GetArray()); +} + +TEST_F(PDFObjectsTest, Clone) { + // Check for direct objects. + for (size_t i = 0; i < m_DirectObjs.size(); ++i) { + ScopedObj obj(m_DirectObjs[i]->Clone()); + EXPECT_TRUE(Equal(m_DirectObjs[i].get(), obj.get())); + } + + // Check indirect references. + for (const auto& it : m_RefObjs) { + ScopedObj obj(it->Clone()); + EXPECT_TRUE(Equal(it.get(), obj.get())); + } +} + +TEST_F(PDFObjectsTest, GetType) { + // Check for direct objects. + for (size_t i = 0; i < m_DirectObjs.size(); ++i) + EXPECT_EQ(m_DirectObjTypes[i], m_DirectObjs[i]->GetType()); + + // Check indirect references. + for (const auto& it : m_RefObjs) + EXPECT_EQ(CPDF_Object::REFERENCE, it->GetType()); +} + +TEST_F(PDFObjectsTest, GetDirect) { + // Check for direct objects. + for (size_t i = 0; i < m_DirectObjs.size(); ++i) + EXPECT_EQ(m_DirectObjs[i].get(), m_DirectObjs[i]->GetDirect()); + + // Check indirect references. + for (size_t i = 0; i < m_RefObjs.size(); ++i) + EXPECT_EQ(m_IndirectObjs[i], m_RefObjs[i]->GetDirect()); +} + +TEST_F(PDFObjectsTest, SetString) { + // Check for direct objects. + const char* const set_values[] = {"true", "fake", "3.125f", "097", + "changed", "", "NewName"}; + const char* expected[] = {"true", "false", "3.125", "97", + "changed", "", "NewName"}; + for (size_t i = 0; i < FX_ArraySize(set_values); ++i) { + m_DirectObjs[i]->SetString(set_values[i]); + EXPECT_STREQ(expected[i], m_DirectObjs[i]->GetString().c_str()); + } +} + +TEST_F(PDFObjectsTest, IsTypeAndAsType) { + // Check for direct objects. + for (size_t i = 0; i < m_DirectObjs.size(); ++i) { + if (m_DirectObjTypes[i] == CPDF_Object::ARRAY) { + EXPECT_TRUE(m_DirectObjs[i]->IsArray()); + EXPECT_EQ(m_DirectObjs[i].get(), m_DirectObjs[i]->AsArray()); + } else { + EXPECT_FALSE(m_DirectObjs[i]->IsArray()); + EXPECT_EQ(nullptr, m_DirectObjs[i]->AsArray()); + } + + if (m_DirectObjTypes[i] == CPDF_Object::BOOLEAN) { + EXPECT_TRUE(m_DirectObjs[i]->IsBoolean()); + EXPECT_EQ(m_DirectObjs[i].get(), m_DirectObjs[i]->AsBoolean()); + } else { + EXPECT_FALSE(m_DirectObjs[i]->IsBoolean()); + EXPECT_EQ(nullptr, m_DirectObjs[i]->AsBoolean()); + } + + if (m_DirectObjTypes[i] == CPDF_Object::NAME) { + EXPECT_TRUE(m_DirectObjs[i]->IsName()); + EXPECT_EQ(m_DirectObjs[i].get(), m_DirectObjs[i]->AsName()); + } else { + EXPECT_FALSE(m_DirectObjs[i]->IsName()); + EXPECT_EQ(nullptr, m_DirectObjs[i]->AsName()); + } + + if (m_DirectObjTypes[i] == CPDF_Object::NUMBER) { + EXPECT_TRUE(m_DirectObjs[i]->IsNumber()); + EXPECT_EQ(m_DirectObjs[i].get(), m_DirectObjs[i]->AsNumber()); + } else { + EXPECT_FALSE(m_DirectObjs[i]->IsNumber()); + EXPECT_EQ(nullptr, m_DirectObjs[i]->AsNumber()); + } + + if (m_DirectObjTypes[i] == CPDF_Object::STRING) { + EXPECT_TRUE(m_DirectObjs[i]->IsString()); + EXPECT_EQ(m_DirectObjs[i].get(), m_DirectObjs[i]->AsString()); + } else { + EXPECT_FALSE(m_DirectObjs[i]->IsString()); + EXPECT_EQ(nullptr, m_DirectObjs[i]->AsString()); + } + + if (m_DirectObjTypes[i] == CPDF_Object::DICTIONARY) { + EXPECT_TRUE(m_DirectObjs[i]->IsDictionary()); + EXPECT_EQ(m_DirectObjs[i].get(), m_DirectObjs[i]->AsDictionary()); + } else { + EXPECT_FALSE(m_DirectObjs[i]->IsDictionary()); + EXPECT_EQ(nullptr, m_DirectObjs[i]->AsDictionary()); + } + + if (m_DirectObjTypes[i] == CPDF_Object::STREAM) { + EXPECT_TRUE(m_DirectObjs[i]->IsStream()); + EXPECT_EQ(m_DirectObjs[i].get(), m_DirectObjs[i]->AsStream()); + } else { + EXPECT_FALSE(m_DirectObjs[i]->IsStream()); + EXPECT_EQ(nullptr, m_DirectObjs[i]->AsStream()); + } + + EXPECT_FALSE(m_DirectObjs[i]->IsReference()); + EXPECT_EQ(nullptr, m_DirectObjs[i]->AsReference()); + } + // Check indirect references. + for (size_t i = 0; i < m_RefObjs.size(); ++i) { + EXPECT_TRUE(m_RefObjs[i]->IsReference()); + EXPECT_EQ(m_RefObjs[i].get(), m_RefObjs[i]->AsReference()); + } +} + +TEST(PDFArrayTest, GetMatrix) { + float elems[][6] = {{0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, + {1, 2, 3, 4, 5, 6}, + {2.3f, 4.05f, 3, -2, -3, 0.0f}, + {0.05f, 0.1f, 0.56f, 0.67f, 1.34f, 99.9f}}; + for (size_t i = 0; i < FX_ArraySize(elems); ++i) { + ScopedArray arr(new CPDF_Array); + CFX_Matrix matrix(elems[i][0], elems[i][1], elems[i][2], elems[i][3], + elems[i][4], elems[i][5]); + for (size_t j = 0; j < 6; ++j) + arr->AddNumber(elems[i][j]); + CFX_Matrix arr_matrix = arr->GetMatrix(); + EXPECT_EQ(matrix.GetA(), arr_matrix.GetA()); + EXPECT_EQ(matrix.GetB(), arr_matrix.GetB()); + EXPECT_EQ(matrix.GetC(), arr_matrix.GetC()); + EXPECT_EQ(matrix.GetD(), arr_matrix.GetD()); + EXPECT_EQ(matrix.GetE(), arr_matrix.GetE()); + EXPECT_EQ(matrix.GetF(), arr_matrix.GetF()); + } +} + +TEST(PDFArrayTest, GetRect) { + float elems[][4] = {{0.0f, 0.0f, 0.0f, 0.0f}, + {1, 2, 5, 6}, + {2.3f, 4.05f, -3, 0.0f}, + {0.05f, 0.1f, 1.34f, 99.9f}}; + for (size_t i = 0; i < FX_ArraySize(elems); ++i) { + ScopedArray arr(new CPDF_Array); + CFX_FloatRect rect(elems[i]); + for (size_t j = 0; j < 4; ++j) + arr->AddNumber(elems[i][j]); + CFX_FloatRect arr_rect = arr->GetRect(); + EXPECT_EQ(rect.left, arr_rect.left); + EXPECT_EQ(rect.right, arr_rect.right); + EXPECT_EQ(rect.bottom, arr_rect.bottom); + EXPECT_EQ(rect.top, arr_rect.top); + } +} + +TEST(PDFArrayTest, GetTypeAt) { + { + // Boolean array. + bool vals[] = {true, false, false, true, true}; + ScopedArray arr(new CPDF_Array); + for (size_t i = 0; i < FX_ArraySize(vals); ++i) + arr->InsertAt(i, new CPDF_Boolean(vals[i])); + for (size_t i = 0; i < FX_ArraySize(vals); ++i) { + TestArrayAccessors(arr.get(), i, // Array and index. + vals[i] ? "true" : "false", // String value. + nullptr, // Const string value. + vals[i] ? 1 : 0, // Integer value. + 0, // Float value. + nullptr, // Array value. + nullptr, // Dictionary value. + nullptr); // Stream value. + } + } + { + // Integer array. + int vals[] = {10, 0, -345, 2089345456, -1000000000, 567, 93658767}; + ScopedArray arr(new CPDF_Array); + for (size_t i = 0; i < FX_ArraySize(vals); ++i) + arr->InsertAt(i, new CPDF_Number(vals[i])); + for (size_t i = 0; i < FX_ArraySize(vals); ++i) { + char buf[33]; + TestArrayAccessors(arr.get(), i, // Array and index. + FXSYS_itoa(vals[i], buf, 10), // String value. + nullptr, // Const string value. + vals[i], // Integer value. + vals[i], // Float value. + nullptr, // Array value. + nullptr, // Dictionary value. + nullptr); // Stream value. + } + } + { + // Float array. + float vals[] = {0.0f, 0, 10, 10.0f, 0.0345f, + 897.34f, -2.5f, -1.0f, -345.0f, -0.0f}; + const char* expected_str[] = {"0", "0", "10", "10", "0.0345", + "897.34", "-2.5", "-1", "-345", "0"}; + ScopedArray arr(new CPDF_Array); + for (size_t i = 0; i < FX_ArraySize(vals); ++i) { + arr->InsertAt(i, new CPDF_Number(vals[i])); + } + for (size_t i = 0; i < FX_ArraySize(vals); ++i) { + TestArrayAccessors(arr.get(), i, // Array and index. + expected_str[i], // String value. + nullptr, // Const string value. + vals[i], // Integer value. + vals[i], // Float value. + nullptr, // Array value. + nullptr, // Dictionary value. + nullptr); // Stream value. + } + } + { + // String and name array + const char* const vals[] = {"this", "adsde$%^", "\r\t", "\"012", + ".", "EYREW", "It is a joke :)"}; + ScopedArray string_array(new CPDF_Array); + ScopedArray name_array(new CPDF_Array); + for (size_t i = 0; i < FX_ArraySize(vals); ++i) { + string_array->InsertAt(i, new CPDF_String(vals[i], false)); + name_array->InsertAt(i, new CPDF_Name(vals[i])); + } + for (size_t i = 0; i < FX_ArraySize(vals); ++i) { + TestArrayAccessors(string_array.get(), i, // Array and index. + vals[i], // String value. + vals[i], // Const string value. + 0, // Integer value. + 0, // Float value. + nullptr, // Array value. + nullptr, // Dictionary value. + nullptr); // Stream value. + TestArrayAccessors(name_array.get(), i, // Array and index. + vals[i], // String value. + vals[i], // Const string value. + 0, // Integer value. + 0, // Float value. + nullptr, // Array value. + nullptr, // Dictionary value. + nullptr); // Stream value. + } + } + { + // Null element array. + ScopedArray arr(new CPDF_Array); + for (size_t i = 0; i < 3; ++i) + arr->InsertAt(i, new CPDF_Null); + for (size_t i = 0; i < 3; ++i) { + TestArrayAccessors(arr.get(), i, // Array and index. + "", // String value. + nullptr, // Const string value. + 0, // Integer value. + 0, // Float value. + nullptr, // Array value. + nullptr, // Dictionary value. + nullptr); // Stream value. + } + } + { + // Array of array. + CPDF_Array* vals[3]; + ScopedArray arr(new CPDF_Array); + for (size_t i = 0; i < 3; ++i) { + vals[i] = new CPDF_Array; + for (size_t j = 0; j < 3; ++j) { + int value = j + 100; + vals[i]->InsertAt(i, new CPDF_Number(value)); + } + arr->InsertAt(i, vals[i]); + } + for (size_t i = 0; i < 3; ++i) { + TestArrayAccessors(arr.get(), i, // Array and index. + "", // String value. + nullptr, // Const string value. + 0, // Integer value. + 0, // Float value. + vals[i], // Array value. + nullptr, // Dictionary value. + nullptr); // Stream value. + } + } + { + // Dictionary array. + CPDF_Dictionary* vals[3]; + ScopedArray arr(new CPDF_Array); + for (size_t i = 0; i < 3; ++i) { + vals[i] = new CPDF_Dictionary; + for (size_t j = 0; j < 3; ++j) { + std::string key("key"); + char buf[33]; + key.append(FXSYS_itoa(j, buf, 10)); + int value = j + 200; + vals[i]->SetAt(CFX_ByteStringC(key.c_str()), new CPDF_Number(value)); + } + arr->InsertAt(i, vals[i]); + } + for (size_t i = 0; i < 3; ++i) { + TestArrayAccessors(arr.get(), i, // Array and index. + "", // String value. + nullptr, // Const string value. + 0, // Integer value. + 0, // Float value. + nullptr, // Array value. + vals[i], // Dictionary value. + nullptr); // Stream value. + } + } + { + // Stream array. + CPDF_Dictionary* vals[3]; + CPDF_Stream* stream_vals[3]; + ScopedArray arr(new CPDF_Array); + for (size_t i = 0; i < 3; ++i) { + vals[i] = new CPDF_Dictionary; + for (size_t j = 0; j < 3; ++j) { + std::string key("key"); + char buf[33]; + key.append(FXSYS_itoa(j, buf, 10)); + int value = j + 200; + vals[i]->SetAt(CFX_ByteStringC(key.c_str()), new CPDF_Number(value)); + } + uint8_t content[] = "content: this is a stream"; + size_t data_size = FX_ArraySize(content); + uint8_t* data = reinterpret_cast<uint8_t*>(malloc(data_size)); + memcpy(data, content, data_size); + stream_vals[i] = new CPDF_Stream(data, data_size, vals[i]); + arr->InsertAt(i, stream_vals[i]); + } + for (size_t i = 0; i < 3; ++i) { + TestArrayAccessors(arr.get(), i, // Array and index. + "", // String value. + nullptr, // Const string value. + 0, // Integer value. + 0, // Float value. + nullptr, // Array value. + vals[i], // Dictionary value. + stream_vals[i]); // Stream value. + } + } + { + // Mixed array. + ScopedArray arr(new CPDF_Array); + // Array arr will take ownership of all the objects inserted. + arr->InsertAt(0, new CPDF_Boolean(true)); + arr->InsertAt(1, new CPDF_Boolean(false)); + arr->InsertAt(2, new CPDF_Number(0)); + arr->InsertAt(3, new CPDF_Number(-1234)); + arr->InsertAt(4, new CPDF_Number(2345.0f)); + arr->InsertAt(5, new CPDF_Number(0.05f)); + arr->InsertAt(6, new CPDF_String("", false)); + arr->InsertAt(7, new CPDF_String("It is a test!", false)); + arr->InsertAt(8, new CPDF_Name("NAME")); + arr->InsertAt(9, new CPDF_Name("test")); + arr->InsertAt(10, new CPDF_Null()); + CPDF_Array* arr_val = new CPDF_Array; + arr_val->AddNumber(1); + arr_val->AddNumber(2); + arr->InsertAt(11, arr_val); + CPDF_Dictionary* dict_val = new CPDF_Dictionary; + dict_val->SetAt("key1", new CPDF_String("Linda", false)); + dict_val->SetAt("key2", new CPDF_String("Zoe", false)); + arr->InsertAt(12, dict_val); + CPDF_Dictionary* stream_dict = new CPDF_Dictionary; + stream_dict->SetAt("key1", new CPDF_String("John", false)); + stream_dict->SetAt("key2", new CPDF_String("King", false)); + uint8_t data[] = "A stream for test"; + // The data buffer will be owned by stream object, so it needs to be + // dynamically allocated. + size_t buf_size = sizeof(data); + uint8_t* buf = reinterpret_cast<uint8_t*>(malloc(buf_size)); + memcpy(buf, data, buf_size); + CPDF_Stream* stream_val = new CPDF_Stream(buf, buf_size, stream_dict); + arr->InsertAt(13, stream_val); + const char* const expected_str[] = { + "true", "false", "0", "-1234", "2345", "0.05", "", + "It is a test!", "NAME", "test", "", "", "", ""}; + const char* const expected_cstr[] = { + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + "It is a test!", "NAME", "test", nullptr, nullptr, nullptr, nullptr}; + const int expected_int[] = {1, 0, 0, -1234, 2345, 0, 0, + 0, 0, 0, 0, 0, 0, 0}; + const float expected_float[] = {0, 0, 0, -1234, 2345, 0.05f, 0, + 0, 0, 0, 0, 0, 0, 0}; + for (size_t i = 0; i < arr->GetCount(); ++i) { + EXPECT_STREQ(expected_str[i], arr->GetStringAt(i).c_str()); + EXPECT_STREQ(expected_cstr[i], arr->GetConstStringAt(i).GetCStr()); + EXPECT_EQ(expected_int[i], arr->GetIntegerAt(i)); + EXPECT_EQ(expected_float[i], arr->GetNumberAt(i)); + EXPECT_EQ(expected_float[i], arr->GetFloatAt(i)); + if (i == 11) + EXPECT_EQ(arr_val, arr->GetArrayAt(i)); + else + EXPECT_EQ(nullptr, arr->GetArrayAt(i)); + if (i == 13) { + EXPECT_EQ(stream_dict, arr->GetDictAt(i)); + EXPECT_EQ(stream_val, arr->GetStreamAt(i)); + } else { + EXPECT_EQ(nullptr, arr->GetStreamAt(i)); + if (i == 12) + EXPECT_EQ(dict_val, arr->GetDictAt(i)); + else + EXPECT_EQ(nullptr, arr->GetDictAt(i)); + } + } + } +} + +TEST(PDFArrayTest, AddNumber) { + float vals[] = {1.0f, -1.0f, 0, 0.456734f, + 12345.54321f, 0.5f, 1000, 0.000045f}; + ScopedArray arr(new CPDF_Array); + for (size_t i = 0; i < FX_ArraySize(vals); ++i) + arr->AddNumber(vals[i]); + for (size_t i = 0; i < FX_ArraySize(vals); ++i) { + EXPECT_EQ(CPDF_Object::NUMBER, arr->GetElement(i)->GetType()); + EXPECT_EQ(vals[i], arr->GetElement(i)->GetNumber()); + } +} + +TEST(PDFArrayTest, AddInteger) { + int vals[] = {0, 1, 934435456, 876, 10000, -1, -24354656, -100}; + ScopedArray arr(new CPDF_Array); + for (size_t i = 0; i < FX_ArraySize(vals); ++i) + arr->AddInteger(vals[i]); + for (size_t i = 0; i < FX_ArraySize(vals); ++i) { + EXPECT_EQ(CPDF_Object::NUMBER, arr->GetElement(i)->GetType()); + EXPECT_EQ(vals[i], arr->GetElement(i)->GetNumber()); + } +} + +TEST(PDFArrayTest, AddStringAndName) { + const char* vals[] = {"", "a", "ehjhRIOYTTFdfcdnv", "122323", + "$#%^&**", " ", "This is a test.\r\n"}; + ScopedArray string_array(new CPDF_Array); + ScopedArray name_array(new CPDF_Array); + for (size_t i = 0; i < FX_ArraySize(vals); ++i) { + string_array->AddString(vals[i]); + name_array->AddName(vals[i]); + } + for (size_t i = 0; i < FX_ArraySize(vals); ++i) { + EXPECT_EQ(CPDF_Object::STRING, string_array->GetElement(i)->GetType()); + EXPECT_STREQ(vals[i], string_array->GetElement(i)->GetString().c_str()); + EXPECT_EQ(CPDF_Object::NAME, name_array->GetElement(i)->GetType()); + EXPECT_STREQ(vals[i], name_array->GetElement(i)->GetString().c_str()); + } +} + +TEST(PDFArrayTest, AddReferenceAndGetElement) { + std::unique_ptr<CPDF_IndirectObjectHolder> holder( + new CPDF_IndirectObjectHolder(nullptr)); + CPDF_Boolean* boolean_obj = new CPDF_Boolean(true); + CPDF_Number* int_obj = new CPDF_Number(-1234); + CPDF_Number* float_obj = new CPDF_Number(2345.089f); + CPDF_String* str_obj = new CPDF_String("Adsfdsf 343434 %&&*\n", false); + CPDF_Name* name_obj = new CPDF_Name("Title:"); + CPDF_Null* null_obj = new CPDF_Null(); + CPDF_Object* indirect_objs[] = {boolean_obj, int_obj, float_obj, + str_obj, name_obj, null_obj}; + unsigned int obj_nums[] = {2, 4, 7, 2345, 799887, 1}; + ScopedArray arr(new CPDF_Array); + ScopedArray arr1(new CPDF_Array); + // Create two arrays of references by different AddReference() APIs. + for (size_t i = 0; i < FX_ArraySize(indirect_objs); ++i) { + // All the indirect objects inserted will be owned by holder. + holder->InsertIndirectObject(obj_nums[i], indirect_objs[i]); + arr->AddReference(holder.get(), obj_nums[i]); + arr1->AddReference(holder.get(), indirect_objs[i]); + } + // Check indirect objects. + for (size_t i = 0; i < FX_ArraySize(obj_nums); ++i) + EXPECT_EQ(indirect_objs[i], holder->GetIndirectObject(obj_nums[i])); + // Check arrays. + EXPECT_EQ(arr->GetCount(), arr1->GetCount()); + for (size_t i = 0; i < arr->GetCount(); ++i) { + EXPECT_EQ(CPDF_Object::REFERENCE, arr->GetElement(i)->GetType()); + EXPECT_EQ(indirect_objs[i], arr->GetElement(i)->GetDirect()); + EXPECT_EQ(indirect_objs[i], arr->GetElementValue(i)); + EXPECT_EQ(CPDF_Object::REFERENCE, arr1->GetElement(i)->GetType()); + EXPECT_EQ(indirect_objs[i], arr1->GetElement(i)->GetDirect()); + EXPECT_EQ(indirect_objs[i], arr1->GetElementValue(i)); + } +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_parser.cpp b/core/fpdfapi/fpdf_parser/cpdf_parser.cpp new file mode 100644 index 0000000000..a39dbc031b --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_parser.cpp @@ -0,0 +1,1652 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/include/fpdfapi/cpdf_parser.h" + +#include <vector> + +#include "core/fpdfapi/fpdf_parser/cpdf_standard_security_handler.h" +#include "core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" +#include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h" +#include "core/include/fpdfapi/cpdf_array.h" +#include "core/include/fpdfapi/cpdf_dictionary.h" +#include "core/include/fpdfapi/cpdf_document.h" +#include "core/include/fpdfapi/cpdf_number.h" +#include "core/include/fpdfapi/cpdf_reference.h" +#include "core/include/fpdfapi/cpdf_stream.h" +#include "core/include/fpdfapi/ipdf_crypto_handler.h" +#include "core/include/fxcrt/fx_ext.h" +#include "core/include/fxcrt/fx_safe_types.h" +#include "third_party/base/stl_util.h" + +namespace { + +// A limit on the size of the xref table. Theoretical limits are higher, but +// this may be large enough in practice. +const int32_t kMaxXRefSize = 1048576; + +// A limit on the maximum object number in the xref table. Theoretical limits +// are higher, but this may be large enough in practice. +const FX_DWORD kMaxObjectNumber = 1048576; + +FX_DWORD GetVarInt(const uint8_t* p, int32_t n) { + FX_DWORD result = 0; + for (int32_t i = 0; i < n; ++i) + result = result * 256 + p[i]; + return result; +} + +int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) { + return pObjStream->GetDict()->GetIntegerBy("N"); +} + +int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) { + return pObjStream->GetDict()->GetIntegerBy("First"); +} + +} // namespace + +CPDF_Parser::CPDF_Parser() + : m_pDocument(nullptr), + m_bOwnFileRead(true), + m_FileVersion(0), + m_pTrailer(nullptr), + m_pEncryptDict(nullptr), + m_bVersionUpdated(false), + m_pLinearized(nullptr), + m_dwFirstPageNo(0), + m_dwXrefStartObjNum(0) { + m_pSyntax.reset(new CPDF_SyntaxParser); +} + +CPDF_Parser::~CPDF_Parser() { + CloseParser(); +} + +FX_DWORD CPDF_Parser::GetLastObjNum() const { + return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first; +} + +bool CPDF_Parser::IsValidObjectNumber(FX_DWORD objnum) const { + return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first; +} + +FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(FX_DWORD objnum) const { + auto it = m_ObjectInfo.find(objnum); + return it != m_ObjectInfo.end() ? it->second.pos : 0; +} + +uint8_t CPDF_Parser::GetObjectType(FX_DWORD objnum) const { + ASSERT(IsValidObjectNumber(objnum)); + auto it = m_ObjectInfo.find(objnum); + return it != m_ObjectInfo.end() ? it->second.type : 0; +} + +uint16_t CPDF_Parser::GetObjectGenNum(FX_DWORD objnum) const { + ASSERT(IsValidObjectNumber(objnum)); + auto it = m_ObjectInfo.find(objnum); + return it != m_ObjectInfo.end() ? it->second.gennum : 0; +} + +bool CPDF_Parser::IsObjectFreeOrNull(FX_DWORD objnum) const { + uint8_t type = GetObjectType(objnum); + return type == 0 || type == 255; +} + +void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) { + m_pEncryptDict = pDict; +} + +IPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() { + return m_pSyntax->m_pCryptoHandler.get(); +} + +IFX_FileRead* CPDF_Parser::GetFileAccess() const { + return m_pSyntax->m_pFileAccess; +} + +void CPDF_Parser::ShrinkObjectMap(FX_DWORD objnum) { + if (objnum == 0) { + m_ObjectInfo.clear(); + return; + } + + auto it = m_ObjectInfo.lower_bound(objnum); + while (it != m_ObjectInfo.end()) { + auto saved_it = it++; + m_ObjectInfo.erase(saved_it); + } + + if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1)) + m_ObjectInfo[objnum - 1].pos = 0; +} + +void CPDF_Parser::CloseParser() { + m_bVersionUpdated = false; + delete m_pDocument; + m_pDocument = nullptr; + + if (m_pTrailer) { + m_pTrailer->Release(); + m_pTrailer = nullptr; + } + ReleaseEncryptHandler(); + SetEncryptDictionary(nullptr); + + if (m_bOwnFileRead && m_pSyntax->m_pFileAccess) { + m_pSyntax->m_pFileAccess->Release(); + m_pSyntax->m_pFileAccess = nullptr; + } + + m_ObjectStreamMap.clear(); + m_ObjCache.clear(); + m_SortedOffset.clear(); + m_ObjectInfo.clear(); + + int32_t iLen = m_Trailers.GetSize(); + for (int32_t i = 0; i < iLen; ++i) { + if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i)) + trailer->Release(); + } + m_Trailers.RemoveAll(); + + if (m_pLinearized) { + m_pLinearized->Release(); + m_pLinearized = nullptr; + } +} + +CPDF_Parser::Error CPDF_Parser::StartParse(IFX_FileRead* pFileAccess) { + CloseParser(); + + m_bXRefStream = FALSE; + m_LastXRefOffset = 0; + m_bOwnFileRead = true; + + int32_t offset = GetHeaderOffset(pFileAccess); + if (offset == -1) { + if (pFileAccess) + pFileAccess->Release(); + return FORMAT_ERROR; + } + m_pSyntax->InitParser(pFileAccess, offset); + + uint8_t ch; + if (!m_pSyntax->GetCharAt(5, ch)) + return FORMAT_ERROR; + if (std::isdigit(ch)) + m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10; + + if (!m_pSyntax->GetCharAt(7, ch)) + return FORMAT_ERROR; + if (std::isdigit(ch)) + m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); + + if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9) + return FORMAT_ERROR; + + m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9); + m_pDocument = new CPDF_Document(this); + + FX_BOOL bXRefRebuilt = FALSE; + if (m_pSyntax->SearchWord("startxref", TRUE, FALSE, 4096)) { + m_SortedOffset.insert(m_pSyntax->SavePos()); + m_pSyntax->GetKeyword(); + + bool bNumber; + CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber); + if (!bNumber) + return FORMAT_ERROR; + + m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str); + if (!LoadAllCrossRefV4(m_LastXRefOffset) && + !LoadAllCrossRefV5(m_LastXRefOffset)) { + if (!RebuildCrossRef()) + return FORMAT_ERROR; + + bXRefRebuilt = TRUE; + m_LastXRefOffset = 0; + } + } else { + if (!RebuildCrossRef()) + return FORMAT_ERROR; + + bXRefRebuilt = TRUE; + } + Error eRet = SetEncryptHandler(); + if (eRet != SUCCESS) + return eRet; + + m_pDocument->LoadDoc(); + if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { + if (bXRefRebuilt) + return FORMAT_ERROR; + + ReleaseEncryptHandler(); + if (!RebuildCrossRef()) + return FORMAT_ERROR; + + eRet = SetEncryptHandler(); + if (eRet != SUCCESS) + return eRet; + + m_pDocument->LoadDoc(); + if (!m_pDocument->GetRoot()) + return FORMAT_ERROR; + } + if (GetRootObjNum() == 0) { + ReleaseEncryptHandler(); + if (!RebuildCrossRef() || GetRootObjNum() == 0) + return FORMAT_ERROR; + + eRet = SetEncryptHandler(); + if (eRet != SUCCESS) + return eRet; + } + if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) { + CPDF_Reference* pMetadata = + ToReference(m_pDocument->GetRoot()->GetElement("Metadata")); + if (pMetadata) + m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum(); + } + return SUCCESS; +} +CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() { + ReleaseEncryptHandler(); + SetEncryptDictionary(nullptr); + + if (!m_pTrailer) + return FORMAT_ERROR; + + CPDF_Object* pEncryptObj = m_pTrailer->GetElement("Encrypt"); + if (pEncryptObj) { + if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) { + SetEncryptDictionary(pEncryptDict); + } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) { + pEncryptObj = m_pDocument->GetIndirectObject(pRef->GetRefObjNum()); + if (pEncryptObj) + SetEncryptDictionary(pEncryptObj->GetDict()); + } + } + + if (m_pEncryptDict) { + CFX_ByteString filter = m_pEncryptDict->GetStringBy("Filter"); + std::unique_ptr<IPDF_SecurityHandler> pSecurityHandler; + Error err = HANDLER_ERROR; + if (filter == "Standard") { + pSecurityHandler.reset(new CPDF_StandardSecurityHandler); + err = PASSWORD_ERROR; + } + if (!pSecurityHandler) + return HANDLER_ERROR; + + if (!pSecurityHandler->OnInit(this, m_pEncryptDict)) + return err; + + m_pSecurityHandler = std::move(pSecurityHandler); + std::unique_ptr<IPDF_CryptoHandler> pCryptoHandler( + m_pSecurityHandler->CreateCryptoHandler()); + if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get())) + return HANDLER_ERROR; + m_pSyntax->SetEncrypt(std::move(pCryptoHandler)); + } + return SUCCESS; +} + +void CPDF_Parser::ReleaseEncryptHandler() { + m_pSyntax->m_pCryptoHandler.reset(); + m_pSecurityHandler.reset(); +} + +FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum) const { + if (!IsValidObjectNumber(objnum)) + return 0; + + if (GetObjectType(objnum) == 1) + return GetObjectPositionOrZero(objnum); + + if (GetObjectType(objnum) == 2) { + FX_FILESIZE pos = GetObjectPositionOrZero(objnum); + return GetObjectPositionOrZero(pos); + } + return 0; +} + +FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) { + if (!LoadCrossRefV4(xrefpos, 0, TRUE)) + return FALSE; + + m_pTrailer = LoadTrailerV4(); + if (!m_pTrailer) + return FALSE; + + int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); + if (xrefsize > 0 && xrefsize <= kMaxXRefSize) + ShrinkObjectMap(xrefsize); + + std::vector<FX_FILESIZE> CrossRefList; + std::vector<FX_FILESIZE> XRefStreamList; + std::set<FX_FILESIZE> seen_xrefpos; + + CrossRefList.push_back(xrefpos); + XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm")); + seen_xrefpos.insert(xrefpos); + + // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not + // numerical, GetDirectInteger() returns 0. Loading will end. + xrefpos = GetDirectInteger(m_pTrailer, "Prev"); + while (xrefpos) { + // Check for circular references. + if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) + return FALSE; + + seen_xrefpos.insert(xrefpos); + + // SLOW ... + CrossRefList.insert(CrossRefList.begin(), xrefpos); + LoadCrossRefV4(xrefpos, 0, TRUE); + + std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( + LoadTrailerV4()); + if (!pDict) + return FALSE; + + xrefpos = GetDirectInteger(pDict.get(), "Prev"); + + // SLOW ... + XRefStreamList.insert(XRefStreamList.begin(), + pDict->GetIntegerBy("XRefStm")); + m_Trailers.Add(pDict.release()); + } + + for (size_t i = 0; i < CrossRefList.size(); ++i) { + if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) + return FALSE; + } + return TRUE; +} + +FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos, + FX_DWORD dwObjCount) { + if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount)) + return FALSE; + + m_pTrailer = LoadTrailerV4(); + if (!m_pTrailer) + return FALSE; + + int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); + if (xrefsize == 0) + return FALSE; + + std::vector<FX_FILESIZE> CrossRefList; + std::vector<FX_FILESIZE> XRefStreamList; + std::set<FX_FILESIZE> seen_xrefpos; + + CrossRefList.push_back(xrefpos); + XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm")); + seen_xrefpos.insert(xrefpos); + + xrefpos = GetDirectInteger(m_pTrailer, "Prev"); + while (xrefpos) { + // Check for circular references. + if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) + return FALSE; + + seen_xrefpos.insert(xrefpos); + + // SLOW ... + CrossRefList.insert(CrossRefList.begin(), xrefpos); + LoadCrossRefV4(xrefpos, 0, TRUE); + + std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( + LoadTrailerV4()); + if (!pDict) + return FALSE; + + xrefpos = GetDirectInteger(pDict.get(), "Prev"); + + // SLOW ... + XRefStreamList.insert(XRefStreamList.begin(), + pDict->GetIntegerBy("XRefStm")); + m_Trailers.Add(pDict.release()); + } + + for (size_t i = 1; i < CrossRefList.size(); ++i) { + if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) + return FALSE; + } + return TRUE; +} + +FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos, + FX_DWORD dwObjCount) { + FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset; + + m_pSyntax->RestorePos(dwStartPos); + m_SortedOffset.insert(pos); + + FX_DWORD start_objnum = 0; + FX_DWORD count = dwObjCount; + FX_FILESIZE SavedPos = m_pSyntax->SavePos(); + + const int32_t recordsize = 20; + std::vector<char> buf(1024 * recordsize + 1); + buf[1024 * recordsize] = '\0'; + + int32_t nBlocks = count / 1024 + 1; + for (int32_t block = 0; block < nBlocks; block++) { + int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; + FX_DWORD dwReadSize = block_size * recordsize; + if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen) + return FALSE; + + if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), + dwReadSize)) { + return FALSE; + } + + for (int32_t i = 0; i < block_size; i++) { + FX_DWORD objnum = start_objnum + block * 1024 + i; + char* pEntry = &buf[i * recordsize]; + if (pEntry[17] == 'f') { + m_ObjectInfo[objnum].pos = 0; + m_ObjectInfo[objnum].type = 0; + } else { + int32_t offset = FXSYS_atoi(pEntry); + if (offset == 0) { + for (int32_t c = 0; c < 10; c++) { + if (!std::isdigit(pEntry[c])) + return FALSE; + } + } + + m_ObjectInfo[objnum].pos = offset; + int32_t version = FXSYS_atoi(pEntry + 11); + if (version >= 1) + m_bVersionUpdated = true; + + m_ObjectInfo[objnum].gennum = version; + if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen) + m_SortedOffset.insert(m_ObjectInfo[objnum].pos); + + m_ObjectInfo[objnum].type = 1; + } + } + } + m_pSyntax->RestorePos(SavedPos + count * recordsize); + return TRUE; +} + +bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, + FX_FILESIZE streampos, + FX_BOOL bSkip) { + m_pSyntax->RestorePos(pos); + if (m_pSyntax->GetKeyword() != "xref") + return false; + + m_SortedOffset.insert(pos); + if (streampos) + m_SortedOffset.insert(streampos); + + while (1) { + FX_FILESIZE SavedPos = m_pSyntax->SavePos(); + bool bIsNumber; + CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); + if (word.IsEmpty()) + return false; + + if (!bIsNumber) { + m_pSyntax->RestorePos(SavedPos); + break; + } + + FX_DWORD start_objnum = FXSYS_atoui(word); + if (start_objnum >= kMaxObjectNumber) + return false; + + FX_DWORD count = m_pSyntax->GetDirectNum(); + m_pSyntax->ToNextWord(); + SavedPos = m_pSyntax->SavePos(); + const int32_t recordsize = 20; + + m_dwXrefStartObjNum = start_objnum; + if (!bSkip) { + std::vector<char> buf(1024 * recordsize + 1); + buf[1024 * recordsize] = '\0'; + + int32_t nBlocks = count / 1024 + 1; + for (int32_t block = 0; block < nBlocks; block++) { + int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; + m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), + block_size * recordsize); + + for (int32_t i = 0; i < block_size; i++) { + FX_DWORD objnum = start_objnum + block * 1024 + i; + char* pEntry = &buf[i * recordsize]; + if (pEntry[17] == 'f') { + m_ObjectInfo[objnum].pos = 0; + m_ObjectInfo[objnum].type = 0; + } else { + FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry); + if (offset == 0) { + for (int32_t c = 0; c < 10; c++) { + if (!std::isdigit(pEntry[c])) + return false; + } + } + + m_ObjectInfo[objnum].pos = offset; + int32_t version = FXSYS_atoi(pEntry + 11); + if (version >= 1) + m_bVersionUpdated = true; + + m_ObjectInfo[objnum].gennum = version; + if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen) + m_SortedOffset.insert(m_ObjectInfo[objnum].pos); + + m_ObjectInfo[objnum].type = 1; + } + } + } + } + m_pSyntax->RestorePos(SavedPos + count * recordsize); + } + return !streampos || LoadCrossRefV5(&streampos, FALSE); +} + +FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) { + if (!LoadCrossRefV5(&xrefpos, TRUE)) + return FALSE; + + std::set<FX_FILESIZE> seen_xrefpos; + while (xrefpos) { + seen_xrefpos.insert(xrefpos); + if (!LoadCrossRefV5(&xrefpos, FALSE)) + return FALSE; + + // Check for circular references. + if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) + return FALSE; + } + m_ObjectStreamMap.clear(); + m_bXRefStream = TRUE; + return TRUE; +} + +FX_BOOL CPDF_Parser::RebuildCrossRef() { + m_ObjectInfo.clear(); + m_SortedOffset.clear(); + if (m_pTrailer) { + m_pTrailer->Release(); + m_pTrailer = nullptr; + } + + ParserState state = ParserState::kDefault; + + int32_t inside_index = 0; + FX_DWORD objnum = 0; + FX_DWORD gennum = 0; + int32_t depth = 0; + + const FX_DWORD kBufferSize = 4096; + std::vector<uint8_t> buffer(kBufferSize); + + FX_FILESIZE pos = m_pSyntax->m_HeaderOffset; + FX_FILESIZE start_pos = 0; + FX_FILESIZE start_pos1 = 0; + FX_FILESIZE last_obj = -1; + FX_FILESIZE last_xref = -1; + FX_FILESIZE last_trailer = -1; + + while (pos < m_pSyntax->m_FileLen) { + const FX_FILESIZE saved_pos = pos; + bool bOverFlow = false; + FX_DWORD size = + std::min((FX_DWORD)(m_pSyntax->m_FileLen - pos), kBufferSize); + if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size)) + break; + + for (FX_DWORD i = 0; i < size; i++) { + uint8_t byte = buffer[i]; + switch (state) { + case ParserState::kDefault: + if (PDFCharIsWhitespace(byte)) { + state = ParserState::kWhitespace; + } else if (std::isdigit(byte)) { + --i; + state = ParserState::kWhitespace; + } else if (byte == '%') { + inside_index = 0; + state = ParserState::kComment; + } else if (byte == '(') { + state = ParserState::kString; + depth = 1; + } else if (byte == '<') { + inside_index = 1; + state = ParserState::kHexString; + } else if (byte == '\\') { + state = ParserState::kEscapedString; + } else if (byte == 't') { + state = ParserState::kTrailer; + inside_index = 1; + } + break; + + case ParserState::kWhitespace: + if (std::isdigit(byte)) { + start_pos = pos + i; + state = ParserState::kObjNum; + objnum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); + } else if (byte == 't') { + state = ParserState::kTrailer; + inside_index = 1; + } else if (byte == 'x') { + state = ParserState::kXref; + inside_index = 1; + } else if (!PDFCharIsWhitespace(byte)) { + --i; + state = ParserState::kDefault; + } + break; + + case ParserState::kObjNum: + if (std::isdigit(byte)) { + objnum = + objnum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); + } else if (PDFCharIsWhitespace(byte)) { + state = ParserState::kPostObjNum; + } else { + --i; + state = ParserState::kEndObj; + inside_index = 0; + } + break; + + case ParserState::kPostObjNum: + if (std::isdigit(byte)) { + start_pos1 = pos + i; + state = ParserState::kGenNum; + gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); + } else if (byte == 't') { + state = ParserState::kTrailer; + inside_index = 1; + } else if (!PDFCharIsWhitespace(byte)) { + --i; + state = ParserState::kDefault; + } + break; + + case ParserState::kGenNum: + if (std::isdigit(byte)) { + gennum = + gennum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); + } else if (PDFCharIsWhitespace(byte)) { + state = ParserState::kPostGenNum; + } else { + --i; + state = ParserState::kDefault; + } + break; + + case ParserState::kPostGenNum: + if (byte == 'o') { + state = ParserState::kBeginObj; + inside_index = 1; + } else if (std::isdigit(byte)) { + objnum = gennum; + gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); + start_pos = start_pos1; + start_pos1 = pos + i; + state = ParserState::kGenNum; + } else if (byte == 't') { + state = ParserState::kTrailer; + inside_index = 1; + } else if (!PDFCharIsWhitespace(byte)) { + --i; + state = ParserState::kDefault; + } + break; + + case ParserState::kBeginObj: + switch (inside_index) { + case 1: + if (byte != 'b') { + --i; + state = ParserState::kDefault; + } else { + inside_index++; + } + break; + case 2: + if (byte != 'j') { + --i; + state = ParserState::kDefault; + } else { + inside_index++; + } + break; + case 3: + if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { + FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset; + m_SortedOffset.insert(obj_pos); + last_obj = start_pos; + FX_FILESIZE obj_end = 0; + CPDF_Object* pObject = ParseIndirectObjectAtByStrict( + m_pDocument, obj_pos, objnum, &obj_end); + if (CPDF_Stream* pStream = ToStream(pObject)) { + if (CPDF_Dictionary* pDict = pStream->GetDict()) { + if ((pDict->KeyExist("Type")) && + (pDict->GetStringBy("Type") == "XRef" && + pDict->KeyExist("Size"))) { + CPDF_Object* pRoot = pDict->GetElement("Root"); + if (pRoot && pRoot->GetDict() && + pRoot->GetDict()->GetElement("Pages")) { + if (m_pTrailer) + m_pTrailer->Release(); + m_pTrailer = ToDictionary(pDict->Clone()); + } + } + } + } + + FX_FILESIZE offset = 0; + m_pSyntax->RestorePos(obj_pos); + offset = m_pSyntax->FindTag("obj", 0); + if (offset == -1) + offset = 0; + else + offset += 3; + + FX_FILESIZE nLen = obj_end - obj_pos - offset; + if ((FX_DWORD)nLen > size - i) { + pos = obj_end + m_pSyntax->m_HeaderOffset; + bOverFlow = true; + } else { + i += (FX_DWORD)nLen; + } + + if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) && + m_ObjectInfo[objnum].pos) { + if (pObject) { + FX_DWORD oldgen = GetObjectGenNum(objnum); + m_ObjectInfo[objnum].pos = obj_pos; + m_ObjectInfo[objnum].gennum = gennum; + if (oldgen != gennum) + m_bVersionUpdated = true; + } + } else { + m_ObjectInfo[objnum].pos = obj_pos; + m_ObjectInfo[objnum].type = 1; + m_ObjectInfo[objnum].gennum = gennum; + } + + if (pObject) + pObject->Release(); + } + --i; + state = ParserState::kDefault; + break; + } + break; + + case ParserState::kTrailer: + if (inside_index == 7) { + if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { + last_trailer = pos + i - 7; + m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset); + + CPDF_Object* pObj = m_pSyntax->GetObject(m_pDocument, 0, 0, true); + if (pObj) { + if (!pObj->IsDictionary() && !pObj->AsStream()) { + pObj->Release(); + } else { + CPDF_Stream* pStream = pObj->AsStream(); + if (CPDF_Dictionary* pTrailer = + pStream ? pStream->GetDict() : pObj->AsDictionary()) { + if (m_pTrailer) { + CPDF_Object* pRoot = pTrailer->GetElement("Root"); + CPDF_Reference* pRef = ToReference(pRoot); + if (!pRoot || + (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) && + m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) { + auto it = pTrailer->begin(); + while (it != pTrailer->end()) { + const CFX_ByteString& key = it->first; + CPDF_Object* pElement = it->second; + ++it; + FX_DWORD dwObjNum = + pElement ? pElement->GetObjNum() : 0; + if (dwObjNum) { + m_pTrailer->SetAtReference(key, m_pDocument, + dwObjNum); + } else { + m_pTrailer->SetAt(key, pElement->Clone()); + } + } + } + pObj->Release(); + } else { + if (pObj->IsStream()) { + m_pTrailer = ToDictionary(pTrailer->Clone()); + pObj->Release(); + } else { + m_pTrailer = pTrailer; + } + + FX_FILESIZE dwSavePos = m_pSyntax->SavePos(); + CFX_ByteString strWord = m_pSyntax->GetKeyword(); + if (!strWord.Compare("startxref")) { + bool bNumber; + CFX_ByteString bsOffset = + m_pSyntax->GetNextWord(&bNumber); + if (bNumber) + m_LastXRefOffset = FXSYS_atoi(bsOffset); + } + m_pSyntax->RestorePos(dwSavePos); + } + } else { + pObj->Release(); + } + } + } + } + --i; + state = ParserState::kDefault; + } else if (byte == "trailer"[inside_index]) { + inside_index++; + } else { + --i; + state = ParserState::kDefault; + } + break; + + case ParserState::kXref: + if (inside_index == 4) { + last_xref = pos + i - 4; + state = ParserState::kWhitespace; + } else if (byte == "xref"[inside_index]) { + inside_index++; + } else { + --i; + state = ParserState::kDefault; + } + break; + + case ParserState::kComment: + if (PDFCharIsLineEnding(byte)) + state = ParserState::kDefault; + break; + + case ParserState::kString: + if (byte == ')') { + if (depth > 0) + depth--; + } else if (byte == '(') { + depth++; + } + + if (!depth) + state = ParserState::kDefault; + break; + + case ParserState::kHexString: + if (byte == '>' || (byte == '<' && inside_index == 1)) + state = ParserState::kDefault; + inside_index = 0; + break; + + case ParserState::kEscapedString: + if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { + --i; + state = ParserState::kDefault; + } + break; + + case ParserState::kEndObj: + if (PDFCharIsWhitespace(byte)) { + state = ParserState::kDefault; + } else if (byte == '%' || byte == '(' || byte == '<' || + byte == '\\') { + state = ParserState::kDefault; + --i; + } else if (inside_index == 6) { + state = ParserState::kDefault; + --i; + } else if (byte == "endobj"[inside_index]) { + inside_index++; + } + break; + } + + if (bOverFlow) { + size = 0; + break; + } + } + pos += size; + + // If the position has not changed at all in a loop iteration, then break + // out to prevent infinite looping. + if (pos == saved_pos) + break; + } + + if (last_xref != -1 && last_xref > last_obj) + last_trailer = last_xref; + else if (last_trailer == -1 || last_xref < last_obj) + last_trailer = m_pSyntax->m_FileLen; + + m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset); + return m_pTrailer && !m_ObjectInfo.empty(); +} + +FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) { + CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0); + if (!pObject) + return FALSE; + + if (m_pDocument) { + FX_BOOL bInserted = FALSE; + CPDF_Dictionary* pDict = m_pDocument->GetRoot(); + if (!pDict || pDict->GetObjNum() != pObject->m_ObjNum) { + bInserted = m_pDocument->InsertIndirectObject(pObject->m_ObjNum, pObject); + } else { + if (pObject->IsStream()) + pObject->Release(); + } + + if (!bInserted) + return FALSE; + } + + CPDF_Stream* pStream = pObject->AsStream(); + if (!pStream) + return FALSE; + + *pos = pStream->GetDict()->GetIntegerBy("Prev"); + int32_t size = pStream->GetDict()->GetIntegerBy("Size"); + if (size < 0) { + pStream->Release(); + return FALSE; + } + + if (bMainXRef) { + m_pTrailer = ToDictionary(pStream->GetDict()->Clone()); + ShrinkObjectMap(size); + for (auto& it : m_ObjectInfo) + it.second.type = 0; + } else { + m_Trailers.Add(ToDictionary(pStream->GetDict()->Clone())); + } + + std::vector<std::pair<int32_t, int32_t>> arrIndex; + CPDF_Array* pArray = pStream->GetDict()->GetArrayBy("Index"); + if (pArray) { + FX_DWORD nPairSize = pArray->GetCount() / 2; + for (FX_DWORD i = 0; i < nPairSize; i++) { + CPDF_Object* pStartNumObj = pArray->GetElement(i * 2); + CPDF_Object* pCountObj = pArray->GetElement(i * 2 + 1); + + if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) { + int nStartNum = pStartNumObj->GetInteger(); + int nCount = pCountObj->GetInteger(); + if (nStartNum >= 0 && nCount > 0) + arrIndex.push_back(std::make_pair(nStartNum, nCount)); + } + } + } + + if (arrIndex.size() == 0) + arrIndex.push_back(std::make_pair(0, size)); + + pArray = pStream->GetDict()->GetArrayBy("W"); + if (!pArray) { + pStream->Release(); + return FALSE; + } + + CFX_DWordArray WidthArray; + FX_SAFE_DWORD dwAccWidth = 0; + for (FX_DWORD i = 0; i < pArray->GetCount(); i++) { + WidthArray.Add(pArray->GetIntegerAt(i)); + dwAccWidth += WidthArray[i]; + } + + if (!dwAccWidth.IsValid() || WidthArray.GetSize() < 3) { + pStream->Release(); + return FALSE; + } + + FX_DWORD totalWidth = dwAccWidth.ValueOrDie(); + CPDF_StreamAcc acc; + acc.LoadAllData(pStream); + + const uint8_t* pData = acc.GetData(); + FX_DWORD dwTotalSize = acc.GetSize(); + FX_DWORD segindex = 0; + for (FX_DWORD i = 0; i < arrIndex.size(); i++) { + int32_t startnum = arrIndex[i].first; + if (startnum < 0) + continue; + + m_dwXrefStartObjNum = + pdfium::base::checked_cast<FX_DWORD, int32_t>(startnum); + FX_DWORD count = + pdfium::base::checked_cast<FX_DWORD, int32_t>(arrIndex[i].second); + FX_SAFE_DWORD dwCaculatedSize = segindex; + dwCaculatedSize += count; + dwCaculatedSize *= totalWidth; + if (!dwCaculatedSize.IsValid() || + dwCaculatedSize.ValueOrDie() > dwTotalSize) { + continue; + } + + const uint8_t* segstart = pData + segindex * totalWidth; + FX_SAFE_DWORD dwMaxObjNum = startnum; + dwMaxObjNum += count; + FX_DWORD dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1; + if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) + continue; + + for (FX_DWORD j = 0; j < count; j++) { + int32_t type = 1; + const uint8_t* entrystart = segstart + j * totalWidth; + if (WidthArray[0]) + type = GetVarInt(entrystart, WidthArray[0]); + + if (GetObjectType(startnum + j) == 255) { + FX_FILESIZE offset = + GetVarInt(entrystart + WidthArray[0], WidthArray[1]); + m_ObjectInfo[startnum + j].pos = offset; + m_SortedOffset.insert(offset); + continue; + } + + if (GetObjectType(startnum + j)) + continue; + + m_ObjectInfo[startnum + j].type = type; + if (type == 0) { + m_ObjectInfo[startnum + j].pos = 0; + } else { + FX_FILESIZE offset = + GetVarInt(entrystart + WidthArray[0], WidthArray[1]); + m_ObjectInfo[startnum + j].pos = offset; + if (type == 1) { + m_SortedOffset.insert(offset); + } else { + if (offset < 0 || !IsValidObjectNumber(offset)) { + pStream->Release(); + return FALSE; + } + m_ObjectInfo[offset].type = 255; + } + } + } + segindex += count; + } + pStream->Release(); + return TRUE; +} + +CPDF_Array* CPDF_Parser::GetIDArray() { + CPDF_Object* pID = m_pTrailer ? m_pTrailer->GetElement("ID") : nullptr; + if (!pID) + return nullptr; + + if (CPDF_Reference* pRef = pID->AsReference()) { + pID = ParseIndirectObject(nullptr, pRef->GetRefObjNum()); + m_pTrailer->SetAt("ID", pID); + } + return ToArray(pID); +} + +FX_DWORD CPDF_Parser::GetRootObjNum() { + CPDF_Reference* pRef = + ToReference(m_pTrailer ? m_pTrailer->GetElement("Root") : nullptr); + return pRef ? pRef->GetRefObjNum() : 0; +} + +FX_DWORD CPDF_Parser::GetInfoObjNum() { + CPDF_Reference* pRef = + ToReference(m_pTrailer ? m_pTrailer->GetElement("Info") : nullptr); + return pRef ? pRef->GetRefObjNum() : 0; +} + +FX_BOOL CPDF_Parser::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm) { + bForm = FALSE; + if (!IsValidObjectNumber(objnum)) + return TRUE; + + if (GetObjectType(objnum) == 0) + return TRUE; + + if (GetObjectType(objnum) == 2) + return TRUE; + + FX_FILESIZE pos = m_ObjectInfo[objnum].pos; + auto it = m_SortedOffset.find(pos); + if (it == m_SortedOffset.end()) + return TRUE; + + if (++it == m_SortedOffset.end()) + return FALSE; + + FX_FILESIZE size = *it - pos; + FX_FILESIZE SavedPos = m_pSyntax->SavePos(); + m_pSyntax->RestorePos(pos); + + const char kFormStream[] = "/Form\0stream"; + const CFX_ByteStringC kFormStreamStr(kFormStream, sizeof(kFormStream) - 1); + bForm = m_pSyntax->SearchMultiWord(kFormStreamStr, TRUE, size) == 0; + m_pSyntax->RestorePos(SavedPos); + return TRUE; +} + +CPDF_Object* CPDF_Parser::ParseIndirectObject( + CPDF_IndirectObjectHolder* pObjList, + FX_DWORD objnum) { + if (!IsValidObjectNumber(objnum)) + return nullptr; + + // Prevent circular parsing the same object. + if (pdfium::ContainsKey(m_ParsingObjNums, objnum)) + return nullptr; + + pdfium::ScopedSetInsertion<FX_DWORD> local_insert(&m_ParsingObjNums, objnum); + if (GetObjectType(objnum) == 1 || GetObjectType(objnum) == 255) { + FX_FILESIZE pos = m_ObjectInfo[objnum].pos; + if (pos <= 0) + return nullptr; + return ParseIndirectObjectAt(pObjList, pos, objnum); + } + if (GetObjectType(objnum) != 2) + return nullptr; + + CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos); + if (!pObjStream) + return nullptr; + + ScopedFileStream file(FX_CreateMemoryStream( + (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE)); + CPDF_SyntaxParser syntax; + syntax.InitParser(file.get(), 0); + const int32_t offset = GetStreamFirst(pObjStream); + + // Read object numbers from |pObjStream| into a cache. + if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) { + for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) { + FX_DWORD thisnum = syntax.GetDirectNum(); + FX_DWORD thisoff = syntax.GetDirectNum(); + m_ObjCache[pObjStream][thisnum] = thisoff; + } + } + + const auto it = m_ObjCache[pObjStream].find(objnum); + if (it == m_ObjCache[pObjStream].end()) + return nullptr; + + syntax.RestorePos(offset + it->second); + return syntax.GetObject(pObjList, 0, 0, true); +} + +CPDF_StreamAcc* CPDF_Parser::GetObjectStream(FX_DWORD objnum) { + auto it = m_ObjectStreamMap.find(objnum); + if (it != m_ObjectStreamMap.end()) + return it->second.get(); + + if (!m_pDocument) + return nullptr; + + const CPDF_Stream* pStream = ToStream(m_pDocument->GetIndirectObject(objnum)); + if (!pStream) + return nullptr; + + CPDF_StreamAcc* pStreamAcc = new CPDF_StreamAcc; + pStreamAcc->LoadAllData(pStream); + m_ObjectStreamMap[objnum].reset(pStreamAcc); + return pStreamAcc; +} + +FX_FILESIZE CPDF_Parser::GetObjectSize(FX_DWORD objnum) const { + if (!IsValidObjectNumber(objnum)) + return 0; + + if (GetObjectType(objnum) == 2) + objnum = GetObjectPositionOrZero(objnum); + + if (GetObjectType(objnum) != 1 && GetObjectType(objnum) != 255) + return 0; + + FX_FILESIZE offset = GetObjectPositionOrZero(objnum); + if (offset == 0) + return 0; + + auto it = m_SortedOffset.find(offset); + if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) + return 0; + + return *it - offset; +} + +void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum, + uint8_t*& pBuffer, + FX_DWORD& size) { + pBuffer = nullptr; + size = 0; + if (!IsValidObjectNumber(objnum)) + return; + + if (GetObjectType(objnum) == 2) { + CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos); + if (!pObjStream) + return; + + int32_t offset = GetStreamFirst(pObjStream); + const uint8_t* pData = pObjStream->GetData(); + FX_DWORD totalsize = pObjStream->GetSize(); + ScopedFileStream file( + FX_CreateMemoryStream((uint8_t*)pData, (size_t)totalsize, FALSE)); + + CPDF_SyntaxParser syntax; + syntax.InitParser(file.get(), 0); + for (int i = GetStreamNCount(pObjStream); i > 0; --i) { + FX_DWORD thisnum = syntax.GetDirectNum(); + FX_DWORD thisoff = syntax.GetDirectNum(); + if (thisnum != objnum) + continue; + + if (i == 1) { + size = totalsize - (thisoff + offset); + } else { + syntax.GetDirectNum(); // Skip nextnum. + FX_DWORD nextoff = syntax.GetDirectNum(); + size = nextoff - thisoff; + } + + pBuffer = FX_Alloc(uint8_t, size); + FXSYS_memcpy(pBuffer, pData + thisoff + offset, size); + return; + } + return; + } + + if (GetObjectType(objnum) != 1) + return; + + FX_FILESIZE pos = m_ObjectInfo[objnum].pos; + if (pos == 0) + return; + + FX_FILESIZE SavedPos = m_pSyntax->SavePos(); + m_pSyntax->RestorePos(pos); + + bool bIsNumber; + CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); + if (!bIsNumber) { + m_pSyntax->RestorePos(SavedPos); + return; + } + + FX_DWORD parser_objnum = FXSYS_atoui(word); + if (parser_objnum && parser_objnum != objnum) { + m_pSyntax->RestorePos(SavedPos); + return; + } + + word = m_pSyntax->GetNextWord(&bIsNumber); + if (!bIsNumber) { + m_pSyntax->RestorePos(SavedPos); + return; + } + + if (m_pSyntax->GetKeyword() != "obj") { + m_pSyntax->RestorePos(SavedPos); + return; + } + + auto it = m_SortedOffset.find(pos); + if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) { + m_pSyntax->RestorePos(SavedPos); + return; + } + + FX_FILESIZE nextoff = *it; + FX_BOOL bNextOffValid = FALSE; + if (nextoff != pos) { + m_pSyntax->RestorePos(nextoff); + word = m_pSyntax->GetNextWord(&bIsNumber); + if (word == "xref") { + bNextOffValid = TRUE; + } else if (bIsNumber) { + word = m_pSyntax->GetNextWord(&bIsNumber); + if (bIsNumber && m_pSyntax->GetKeyword() == "obj") { + bNextOffValid = TRUE; + } + } + } + + if (!bNextOffValid) { + m_pSyntax->RestorePos(pos); + while (1) { + if (m_pSyntax->GetKeyword() == "endobj") + break; + + if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen) + break; + } + nextoff = m_pSyntax->SavePos(); + } + + size = (FX_DWORD)(nextoff - pos); + pBuffer = FX_Alloc(uint8_t, size); + m_pSyntax->RestorePos(pos); + m_pSyntax->ReadBlock(pBuffer, size); + m_pSyntax->RestorePos(SavedPos); +} + +CPDF_Object* CPDF_Parser::ParseIndirectObjectAt( + CPDF_IndirectObjectHolder* pObjList, + FX_FILESIZE pos, + FX_DWORD objnum) { + FX_FILESIZE SavedPos = m_pSyntax->SavePos(); + m_pSyntax->RestorePos(pos); + bool bIsNumber; + CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); + if (!bIsNumber) { + m_pSyntax->RestorePos(SavedPos); + return nullptr; + } + + FX_FILESIZE objOffset = m_pSyntax->SavePos(); + objOffset -= word.GetLength(); + FX_DWORD parser_objnum = FXSYS_atoui(word); + if (objnum && parser_objnum != objnum) { + m_pSyntax->RestorePos(SavedPos); + return nullptr; + } + + word = m_pSyntax->GetNextWord(&bIsNumber); + if (!bIsNumber) { + m_pSyntax->RestorePos(SavedPos); + return nullptr; + } + + FX_DWORD parser_gennum = FXSYS_atoui(word); + if (m_pSyntax->GetKeyword() != "obj") { + m_pSyntax->RestorePos(SavedPos); + return nullptr; + } + + CPDF_Object* pObj = + m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true); + m_pSyntax->SavePos(); + + CFX_ByteString bsWord = m_pSyntax->GetKeyword(); + if (bsWord == "endobj") + m_pSyntax->SavePos(); + + m_pSyntax->RestorePos(SavedPos); + if (pObj) { + if (!objnum) + pObj->m_ObjNum = parser_objnum; + pObj->m_GenNum = parser_gennum; + } + return pObj; +} + +CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict( + CPDF_IndirectObjectHolder* pObjList, + FX_FILESIZE pos, + FX_DWORD objnum, + FX_FILESIZE* pResultPos) { + FX_FILESIZE SavedPos = m_pSyntax->SavePos(); + m_pSyntax->RestorePos(pos); + + bool bIsNumber; + CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); + if (!bIsNumber) { + m_pSyntax->RestorePos(SavedPos); + return nullptr; + } + + FX_DWORD parser_objnum = FXSYS_atoui(word); + if (objnum && parser_objnum != objnum) { + m_pSyntax->RestorePos(SavedPos); + return nullptr; + } + + word = m_pSyntax->GetNextWord(&bIsNumber); + if (!bIsNumber) { + m_pSyntax->RestorePos(SavedPos); + return nullptr; + } + + FX_DWORD gennum = FXSYS_atoui(word); + if (m_pSyntax->GetKeyword() != "obj") { + m_pSyntax->RestorePos(SavedPos); + return nullptr; + } + + CPDF_Object* pObj = m_pSyntax->GetObjectByStrict(pObjList, objnum, gennum); + if (pResultPos) + *pResultPos = m_pSyntax->m_Pos; + + m_pSyntax->RestorePos(SavedPos); + return pObj; +} + +CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() { + if (m_pSyntax->GetKeyword() != "trailer") + return nullptr; + + std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj( + m_pSyntax->GetObject(m_pDocument, 0, 0, true)); + if (!ToDictionary(pObj.get())) + return nullptr; + return pObj.release()->AsDictionary(); +} + +FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision) { + if (!m_pSecurityHandler) + return (FX_DWORD)-1; + + FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions(); + if (m_pEncryptDict && m_pEncryptDict->GetStringBy("Filter") == "Standard") { + dwPermission &= 0xFFFFFFFC; + dwPermission |= 0xFFFFF0C0; + if (bCheckRevision && m_pEncryptDict->GetIntegerBy("R") == 2) + dwPermission &= 0xFFFFF0FF; + } + return dwPermission; +} + +FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess, + FX_DWORD offset) { + m_pSyntax->InitParser(pFileAccess, offset); + m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9); + + FX_FILESIZE SavedPos = m_pSyntax->SavePos(); + bool bIsNumber; + CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); + if (!bIsNumber) + return FALSE; + + FX_DWORD objnum = FXSYS_atoui(word); + word = m_pSyntax->GetNextWord(&bIsNumber); + if (!bIsNumber) + return FALSE; + + FX_DWORD gennum = FXSYS_atoui(word); + if (m_pSyntax->GetKeyword() != "obj") { + m_pSyntax->RestorePos(SavedPos); + return FALSE; + } + + m_pLinearized = m_pSyntax->GetObject(nullptr, objnum, gennum, true); + if (!m_pLinearized) + return FALSE; + + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + if (pDict && pDict->GetElement("Linearized")) { + m_pSyntax->GetNextWord(nullptr); + + CPDF_Object* pLen = pDict->GetElement("L"); + if (!pLen) { + m_pLinearized->Release(); + m_pLinearized = nullptr; + return FALSE; + } + + if (pLen->GetInteger() != (int)pFileAccess->GetSize()) + return FALSE; + + if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P"))) + m_dwFirstPageNo = pNo->GetInteger(); + + if (CPDF_Number* pTable = ToNumber(pDict->GetElement("T"))) + m_LastXRefOffset = pTable->GetInteger(); + + return TRUE; + } + m_pLinearized->Release(); + m_pLinearized = nullptr; + return FALSE; +} + +CPDF_Parser::Error CPDF_Parser::StartAsyncParse(IFX_FileRead* pFileAccess) { + CloseParser(); + m_bXRefStream = FALSE; + m_LastXRefOffset = 0; + m_bOwnFileRead = true; + + int32_t offset = GetHeaderOffset(pFileAccess); + if (offset == -1) + return FORMAT_ERROR; + + if (!IsLinearizedFile(pFileAccess, offset)) { + m_pSyntax->m_pFileAccess = nullptr; + return StartParse(pFileAccess); + } + + m_pDocument = new CPDF_Document(this); + FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos(); + + FX_BOOL bXRefRebuilt = FALSE; + FX_BOOL bLoadV4 = FALSE; + if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE)) && + !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) { + if (!RebuildCrossRef()) + return FORMAT_ERROR; + + bXRefRebuilt = TRUE; + m_LastXRefOffset = 0; + } + + if (bLoadV4) { + m_pTrailer = LoadTrailerV4(); + if (!m_pTrailer) + return SUCCESS; + + int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); + if (xrefsize > 0) + ShrinkObjectMap(xrefsize); + } + + Error eRet = SetEncryptHandler(); + if (eRet != SUCCESS) + return eRet; + + m_pDocument->LoadAsynDoc(m_pLinearized->GetDict()); + if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { + if (bXRefRebuilt) + return FORMAT_ERROR; + + ReleaseEncryptHandler(); + if (!RebuildCrossRef()) + return FORMAT_ERROR; + + eRet = SetEncryptHandler(); + if (eRet != SUCCESS) + return eRet; + + m_pDocument->LoadAsynDoc(m_pLinearized->GetDict()); + if (!m_pDocument->GetRoot()) + return FORMAT_ERROR; + } + + if (GetRootObjNum() == 0) { + ReleaseEncryptHandler(); + if (!RebuildCrossRef() || GetRootObjNum() == 0) + return FORMAT_ERROR; + + eRet = SetEncryptHandler(); + if (eRet != SUCCESS) + return eRet; + } + + if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) { + if (CPDF_Reference* pMetadata = + ToReference(m_pDocument->GetRoot()->GetElement("Metadata"))) + m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum(); + } + return SUCCESS; +} + +FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) { + if (!LoadCrossRefV5(&xrefpos, FALSE)) + return FALSE; + + std::set<FX_FILESIZE> seen_xrefpos; + while (xrefpos) { + seen_xrefpos.insert(xrefpos); + if (!LoadCrossRefV5(&xrefpos, FALSE)) + return FALSE; + + // Check for circular references. + if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) + return FALSE; + } + m_ObjectStreamMap.clear(); + m_bXRefStream = TRUE; + return TRUE; +} + +CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() { + FX_DWORD dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum; + m_pSyntax->m_MetadataObjnum = 0; + if (m_pTrailer) { + m_pTrailer->Release(); + m_pTrailer = nullptr; + } + + m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset); + uint8_t ch = 0; + FX_DWORD dwCount = 0; + m_pSyntax->GetNextChar(ch); + while (PDFCharIsWhitespace(ch)) { + ++dwCount; + if (m_pSyntax->m_FileLen >= + (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) { + break; + } + m_pSyntax->GetNextChar(ch); + } + m_LastXRefOffset += dwCount; + m_ObjectStreamMap.clear(); + m_ObjCache.clear(); + + if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) && + !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) { + m_LastXRefOffset = 0; + m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum; + return FORMAT_ERROR; + } + + m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum; + return SUCCESS; +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_parser_embeddertest.cpp b/core/fpdfapi/fpdf_parser/cpdf_parser_embeddertest.cpp new file mode 100644 index 0000000000..f427ec5d81 --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_parser_embeddertest.cpp @@ -0,0 +1,40 @@ +// Copyright 2015 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "testing/embedder_test.h" +#include "testing/gtest/include/gtest/gtest.h" + +class CPDFParserEmbeddertest : public EmbedderTest {}; + +TEST_F(CPDFParserEmbeddertest, LoadError_454695) { + // Test a dictionary with hex string instead of correct content. + // Verify that the defective pdf shouldn't be opened correctly. + EXPECT_FALSE(OpenDocument("bug_454695.pdf")); +} + +TEST_F(CPDFParserEmbeddertest, Bug_481363) { + // Test colorspace object with malformed dictionary. + EXPECT_TRUE(OpenDocument("bug_481363.pdf")); + FPDF_PAGE page = LoadPage(0); + EXPECT_NE(nullptr, page); + UnloadPage(page); +} + +TEST_F(CPDFParserEmbeddertest, Bug_544880) { + // Test self referencing /Pages object. + EXPECT_TRUE(OpenDocument("bug_544880.pdf")); + // Shouldn't crash. We don't check the return value here because we get the + // the count from the "/Count 1" in the testcase (at the time of writing) + // rather than the actual count (0). + (void)GetPageCount(); +} + +TEST_F(CPDFParserEmbeddertest, Feature_Linearized_Loading) { + EXPECT_TRUE(OpenDocument("feature_linearized_loading.pdf", true)); +} + +TEST_F(CPDFParserEmbeddertest, Bug_325) { + EXPECT_FALSE(OpenDocument("bug_325_a.pdf")); + EXPECT_FALSE(OpenDocument("bug_325_b.pdf")); +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_parser_unittest.cpp b/core/fpdfapi/fpdf_parser/cpdf_parser_unittest.cpp new file mode 100644 index 0000000000..2d1dcd4b5c --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_parser_unittest.cpp @@ -0,0 +1,202 @@ +// Copyright 2015 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <limits> +#include <string> + +#include "core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" +#include "core/include/fpdfapi/cpdf_parser.h" +#include "core/include/fxcrt/fx_ext.h" +#include "core/include/fxcrt/fx_stream.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/utils/path_service.h" + +// Provide a way to read test data from a buffer instead of a file. +class CFX_TestBufferRead : public IFX_FileRead { + public: + CFX_TestBufferRead(const unsigned char* buffer_in, size_t buf_size) + : buffer_(buffer_in), total_size_(buf_size) {} + + // IFX_Stream + void Release() override { delete this; } + + // IFX_FileRead + FX_BOOL ReadBlock(void* buffer, FX_FILESIZE offset, size_t size) override { + if (offset < 0 || offset + size > total_size_) { + return FALSE; + } + + memcpy(buffer, buffer_ + offset, size); + return TRUE; + } + FX_FILESIZE GetSize() override { return (FX_FILESIZE)total_size_; }; + + protected: + const unsigned char* buffer_; + size_t total_size_; +}; + +// A wrapper class to help test member functions of CPDF_Parser. +class CPDF_TestParser : public CPDF_Parser { + public: + CPDF_TestParser() {} + ~CPDF_TestParser() {} + + // Setup reading from a file and initial states. + bool InitTestFromFile(const FX_CHAR* path) { + IFX_FileRead* pFileAccess = FX_CreateFileRead(path); + if (!pFileAccess) + return false; + + // For the test file, the header is set at the beginning. + m_pSyntax->InitParser(pFileAccess, 0); + return true; + } + + // Setup reading from a buffer and initial states. + bool InitTestFromBuffer(const unsigned char* buffer, size_t len) { + CFX_TestBufferRead* buffer_reader = new CFX_TestBufferRead(buffer, len); + + // For the test file, the header is set at the beginning. + m_pSyntax->InitParser(buffer_reader, 0); + return true; + } + + private: + // Add test cases here as private friend so that protected members in + // CPDF_Parser can be accessed by test cases. + // Need to access RebuildCrossRef. + FRIEND_TEST(cpdf_parser, RebuildCrossRefCorrectly); + FRIEND_TEST(cpdf_parser, RebuildCrossRefFailed); + // Need to access LoadCrossRefV4. + FRIEND_TEST(cpdf_parser, LoadCrossRefV4); +}; + +TEST(cpdf_parser, RebuildCrossRefCorrectly) { + CPDF_TestParser parser; + std::string test_file; + ASSERT_TRUE(PathService::GetTestFilePath("parser_rebuildxref_correct.pdf", + &test_file)); + ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file; + + ASSERT_TRUE(parser.RebuildCrossRef()); + const FX_FILESIZE offsets[] = {0, 15, 61, 154, 296, 374, 450}; + const FX_WORD versions[] = {0, 0, 2, 4, 6, 8, 0}; + for (size_t i = 0; i < FX_ArraySize(offsets); ++i) + EXPECT_EQ(offsets[i], parser.m_ObjectInfo[i].pos); + for (size_t i = 0; i < FX_ArraySize(versions); ++i) + EXPECT_EQ(versions[i], parser.m_ObjectInfo[i].gennum); +} + +TEST(cpdf_parser, RebuildCrossRefFailed) { + CPDF_TestParser parser; + std::string test_file; + ASSERT_TRUE(PathService::GetTestFilePath( + "parser_rebuildxref_error_notrailer.pdf", &test_file)); + ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file; + + ASSERT_FALSE(parser.RebuildCrossRef()); +} + +TEST(cpdf_parser, LoadCrossRefV4) { + { + const unsigned char xref_table[] = + "xref \n" + "0 6 \n" + "0000000003 65535 f \n" + "0000000017 00000 n \n" + "0000000081 00000 n \n" + "0000000000 00007 f \n" + "0000000331 00000 n \n" + "0000000409 00000 n \n" + "trail"; // Needed to end cross ref table reading. + CPDF_TestParser parser; + ASSERT_TRUE( + parser.InitTestFromBuffer(xref_table, FX_ArraySize(xref_table))); + + ASSERT_TRUE(parser.LoadCrossRefV4(0, 0, FALSE)); + const FX_FILESIZE offsets[] = {0, 17, 81, 0, 331, 409}; + const uint8_t types[] = {0, 1, 1, 0, 1, 1}; + for (size_t i = 0; i < FX_ArraySize(offsets); ++i) { + EXPECT_EQ(offsets[i], parser.m_ObjectInfo[i].pos); + EXPECT_EQ(types[i], parser.m_ObjectInfo[i].type); + } + } + { + const unsigned char xref_table[] = + "xref \n" + "0 1 \n" + "0000000000 65535 f \n" + "3 1 \n" + "0000025325 00000 n \n" + "8 2 \n" + "0000025518 00002 n \n" + "0000025635 00000 n \n" + "12 1 \n" + "0000025777 00000 n \n" + "trail"; // Needed to end cross ref table reading. + CPDF_TestParser parser; + ASSERT_TRUE( + parser.InitTestFromBuffer(xref_table, FX_ArraySize(xref_table))); + + ASSERT_TRUE(parser.LoadCrossRefV4(0, 0, FALSE)); + const FX_FILESIZE offsets[] = {0, 0, 0, 25325, 0, 0, 0, + 0, 25518, 25635, 0, 0, 25777}; + const uint8_t types[] = {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1}; + for (size_t i = 0; i < FX_ArraySize(offsets); ++i) { + EXPECT_EQ(offsets[i], parser.m_ObjectInfo[i].pos); + EXPECT_EQ(types[i], parser.m_ObjectInfo[i].type); + } + } + { + const unsigned char xref_table[] = + "xref \n" + "0 1 \n" + "0000000000 65535 f \n" + "3 1 \n" + "0000025325 00000 n \n" + "8 2 \n" + "0000000000 65535 f \n" + "0000025635 00000 n \n" + "12 1 \n" + "0000025777 00000 n \n" + "trail"; // Needed to end cross ref table reading. + CPDF_TestParser parser; + ASSERT_TRUE( + parser.InitTestFromBuffer(xref_table, FX_ArraySize(xref_table))); + + ASSERT_TRUE(parser.LoadCrossRefV4(0, 0, FALSE)); + const FX_FILESIZE offsets[] = {0, 0, 0, 25325, 0, 0, 0, + 0, 0, 25635, 0, 0, 25777}; + const uint8_t types[] = {0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1}; + for (size_t i = 0; i < FX_ArraySize(offsets); ++i) { + EXPECT_EQ(offsets[i], parser.m_ObjectInfo[i].pos); + EXPECT_EQ(types[i], parser.m_ObjectInfo[i].type); + } + } + { + const unsigned char xref_table[] = + "xref \n" + "0 7 \n" + "0000000002 65535 f \n" + "0000000023 00000 n \n" + "0000000003 65535 f \n" + "0000000004 65535 f \n" + "0000000000 65535 f \n" + "0000000045 00000 n \n" + "0000000179 00000 n \n" + "trail"; // Needed to end cross ref table reading. + CPDF_TestParser parser; + ASSERT_TRUE( + parser.InitTestFromBuffer(xref_table, FX_ArraySize(xref_table))); + + ASSERT_TRUE(parser.LoadCrossRefV4(0, 0, FALSE)); + const FX_FILESIZE offsets[] = {0, 23, 0, 0, 0, 45, 179}; + const uint8_t types[] = {0, 1, 0, 0, 0, 1, 1}; + for (size_t i = 0; i < FX_ArraySize(offsets); ++i) { + EXPECT_EQ(offsets[i], parser.m_ObjectInfo[i].pos); + EXPECT_EQ(types[i], parser.m_ObjectInfo[i].type); + } + } +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_reference.cpp b/core/fpdfapi/fpdf_parser/cpdf_reference.cpp new file mode 100644 index 0000000000..4aede7d07a --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_reference.cpp @@ -0,0 +1,72 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/include/fpdfapi/cpdf_reference.h" + +#include "core/include/fpdfapi/cpdf_indirect_object_holder.h" + +CPDF_Reference::CPDF_Reference(CPDF_IndirectObjectHolder* pDoc, int objnum) + : m_pObjList(pDoc), m_RefObjNum(objnum) {} + +CPDF_Reference::~CPDF_Reference() {} + +CPDF_Object::Type CPDF_Reference::GetType() const { + return REFERENCE; +} + +CFX_ByteString CPDF_Reference::GetString() const { + CPDF_Object* obj = SafeGetDirect(); + return obj ? obj->GetString() : CFX_ByteString(); +} + +CFX_ByteStringC CPDF_Reference::GetConstString() const { + CPDF_Object* obj = SafeGetDirect(); + return obj ? obj->GetConstString() : CFX_ByteStringC(); +} + +FX_FLOAT CPDF_Reference::GetNumber() const { + CPDF_Object* obj = SafeGetDirect(); + return obj ? obj->GetNumber() : 0; +} + +int CPDF_Reference::GetInteger() const { + CPDF_Object* obj = SafeGetDirect(); + return obj ? obj->GetInteger() : 0; +} + +CPDF_Dictionary* CPDF_Reference::GetDict() const { + CPDF_Object* obj = SafeGetDirect(); + return obj ? obj->GetDict() : nullptr; +} + +bool CPDF_Reference::IsReference() const { + return true; +} + +CPDF_Reference* CPDF_Reference::AsReference() { + return this; +} + +const CPDF_Reference* CPDF_Reference::AsReference() const { + return this; +} + +CPDF_Object* CPDF_Reference::Clone(FX_BOOL bDirect) const { + if (bDirect) { + auto* pDirect = GetDirect(); + return pDirect ? pDirect->Clone(TRUE) : nullptr; + } + return new CPDF_Reference(m_pObjList, m_RefObjNum); +} + +void CPDF_Reference::SetRef(CPDF_IndirectObjectHolder* pDoc, FX_DWORD objnum) { + m_pObjList = pDoc; + m_RefObjNum = objnum; +} + +CPDF_Object* CPDF_Reference::GetDirect() const { + return m_pObjList ? m_pObjList->GetIndirectObject(m_RefObjNum) : nullptr; +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_simple_parser.cpp b/core/fpdfapi/fpdf_parser/cpdf_simple_parser.cpp new file mode 100644 index 0000000000..90d4e0b5ab --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_simple_parser.cpp @@ -0,0 +1,170 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/include/fpdfapi/cpdf_simple_parser.h" + +#include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h" + +CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize) + : m_pData(pData), m_dwSize(dwSize), m_dwCurPos(0) {} + +CPDF_SimpleParser::CPDF_SimpleParser(const CFX_ByteStringC& str) + : m_pData(str.GetPtr()), m_dwSize(str.GetLength()), m_dwCurPos(0) {} + +void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize) { + pStart = nullptr; + dwSize = 0; + uint8_t ch; + while (1) { + if (m_dwSize <= m_dwCurPos) + return; + ch = m_pData[m_dwCurPos++]; + while (PDFCharIsWhitespace(ch)) { + if (m_dwSize <= m_dwCurPos) + return; + ch = m_pData[m_dwCurPos++]; + } + + if (ch != '%') + break; + + while (1) { + if (m_dwSize <= m_dwCurPos) + return; + ch = m_pData[m_dwCurPos++]; + if (PDFCharIsLineEnding(ch)) + break; + } + } + + FX_DWORD start_pos = m_dwCurPos - 1; + pStart = m_pData + start_pos; + if (PDFCharIsDelimiter(ch)) { + if (ch == '/') { + while (1) { + if (m_dwSize <= m_dwCurPos) + return; + ch = m_pData[m_dwCurPos++]; + if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { + m_dwCurPos--; + dwSize = m_dwCurPos - start_pos; + return; + } + } + } else { + dwSize = 1; + if (ch == '<') { + if (m_dwSize <= m_dwCurPos) + return; + ch = m_pData[m_dwCurPos++]; + if (ch == '<') + dwSize = 2; + else + m_dwCurPos--; + } else if (ch == '>') { + if (m_dwSize <= m_dwCurPos) + return; + ch = m_pData[m_dwCurPos++]; + if (ch == '>') + dwSize = 2; + else + m_dwCurPos--; + } + } + return; + } + + dwSize = 1; + while (1) { + if (m_dwSize <= m_dwCurPos) + return; + ch = m_pData[m_dwCurPos++]; + + if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { + m_dwCurPos--; + break; + } + dwSize++; + } +} + +CFX_ByteStringC CPDF_SimpleParser::GetWord() { + const uint8_t* pStart; + FX_DWORD dwSize; + ParseWord(pStart, dwSize); + if (dwSize == 1 && pStart[0] == '<') { + while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') { + m_dwCurPos++; + } + if (m_dwCurPos < m_dwSize) { + m_dwCurPos++; + } + return CFX_ByteStringC(pStart, + (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData))); + } + if (dwSize == 1 && pStart[0] == '(') { + int level = 1; + while (m_dwCurPos < m_dwSize) { + if (m_pData[m_dwCurPos] == ')') { + level--; + if (level == 0) { + break; + } + } + if (m_pData[m_dwCurPos] == '\\') { + if (m_dwSize <= m_dwCurPos) { + break; + } + m_dwCurPos++; + } else if (m_pData[m_dwCurPos] == '(') { + level++; + } + if (m_dwSize <= m_dwCurPos) { + break; + } + m_dwCurPos++; + } + if (m_dwCurPos < m_dwSize) { + m_dwCurPos++; + } + return CFX_ByteStringC(pStart, + (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData))); + } + return CFX_ByteStringC(pStart, dwSize); +} + +bool CPDF_SimpleParser::FindTagParamFromStart(const CFX_ByteStringC& token, + int nParams) { + nParams++; + FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams); + int buf_index = 0; + int buf_count = 0; + m_dwCurPos = 0; + while (1) { + pBuf[buf_index++] = m_dwCurPos; + if (buf_index == nParams) { + buf_index = 0; + } + buf_count++; + if (buf_count > nParams) { + buf_count = nParams; + } + CFX_ByteStringC word = GetWord(); + if (word.IsEmpty()) { + FX_Free(pBuf); + return false; + } + if (word == token) { + if (buf_count < nParams) { + continue; + } + m_dwCurPos = pBuf[buf_index]; + FX_Free(pBuf); + return true; + } + } + return false; +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_simple_parser_unittest.cpp b/core/fpdfapi/fpdf_parser/cpdf_simple_parser_unittest.cpp new file mode 100644 index 0000000000..a9acff4155 --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_simple_parser_unittest.cpp @@ -0,0 +1,96 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/include/fpdfapi/cpdf_simple_parser.h" + +#include <string> + +#include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h" +#include "core/include/fxcrt/fx_basic.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/test_support.h" + +TEST(SimpleParserTest, GetWord) { + pdfium::StrFuncTestData test_data[] = { + // Empty src string. + STR_IN_OUT_CASE("", ""), + // Content with whitespaces only. + STR_IN_OUT_CASE(" \t \0 \n", ""), + // Content with comments only. + STR_IN_OUT_CASE("%this is a test case\r\n%2nd line", ""), + // Mixed whitespaces and comments. + STR_IN_OUT_CASE(" \t \0%try()%haha\n %another line \aa", ""), + // Name. + STR_IN_OUT_CASE(" /Tester ", "/Tester"), + // String. + STR_IN_OUT_CASE("\t(nice day)!\n ", "(nice day)"), + // String with nested braces. + STR_IN_OUT_CASE("\t(It is a (long) day)!\n ", "(It is a (long) day)"), + // String with escaped chars. + STR_IN_OUT_CASE("\t(It is a \\(long\\) day!)hi\n ", + "(It is a \\(long\\) day!)"), + // Hex string. + STR_IN_OUT_CASE(" \n<4545acdfedertt>abc ", "<4545acdfedertt>"), + STR_IN_OUT_CASE(" \n<4545a<ed>ertt>abc ", "<4545a<ed>"), + // Dictionary. + STR_IN_OUT_CASE("<</oc 234 /color 2 3 R>>", "<<"), + STR_IN_OUT_CASE("\t\t<< /abc>>", "<<"), + // Handling ending delimiters. + STR_IN_OUT_CASE("> little bear", ">"), + STR_IN_OUT_CASE(") another bear", ")"), STR_IN_OUT_CASE(">> end ", ">>"), + // No ending delimiters. + STR_IN_OUT_CASE("(sdfgfgbcv", "(sdfgfgbcv"), + // Regular cases. + STR_IN_OUT_CASE("apple pear", "apple"), + STR_IN_OUT_CASE(" pi=3.1415 ", "pi=3.1415"), + STR_IN_OUT_CASE(" p t x c ", "p"), STR_IN_OUT_CASE(" pt\0xc ", "pt"), + STR_IN_OUT_CASE(" $^&&*\t\0sdff ", "$^&&*"), + STR_IN_OUT_CASE("\n\r+3.5656 -11.0", "+3.5656"), + }; + for (size_t i = 0; i < FX_ArraySize(test_data); ++i) { + const pdfium::StrFuncTestData& data = test_data[i]; + CPDF_SimpleParser parser(data.input, data.input_size); + CFX_ByteStringC word = parser.GetWord(); + EXPECT_EQ(std::string(reinterpret_cast<const char*>(data.expected), + data.expected_size), + std::string(word.GetCStr(), word.GetLength())) + << " for case " << i; + } +} + +TEST(SimpleParserTest, FindTagParamFromStart) { + struct FindTagTestStruct { + const unsigned char* input; + unsigned int input_size; + const char* token; + int num_params; + bool result; + unsigned int result_pos; + } test_data[] = { + // Empty strings. + STR_IN_TEST_CASE("", "Tj", 1, false, 0), + STR_IN_TEST_CASE("", "", 1, false, 0), + // Empty token. + STR_IN_TEST_CASE(" T j", "", 1, false, 5), + // No parameter. + STR_IN_TEST_CASE("Tj", "Tj", 1, false, 2), + STR_IN_TEST_CASE("(Tj", "Tj", 1, false, 3), + // Partial token match. + STR_IN_TEST_CASE("\r12\t34 56 78Tj", "Tj", 1, false, 15), + // Regular cases with various parameters. + STR_IN_TEST_CASE("\r\0abd Tj", "Tj", 1, true, 0), + STR_IN_TEST_CASE("12 4 Tj 3 46 Tj", "Tj", 1, true, 2), + STR_IN_TEST_CASE("er^ 2 (34) (5667) Tj", "Tj", 2, true, 5), + STR_IN_TEST_CASE("<344> (232)\t343.4\n12 45 Tj", "Tj", 3, true, 11), + STR_IN_TEST_CASE("1 2 3 4 5 6 7 8 cm", "cm", 6, true, 3), + }; + for (size_t i = 0; i < FX_ArraySize(test_data); ++i) { + const FindTagTestStruct& data = test_data[i]; + CPDF_SimpleParser parser(data.input, data.input_size); + EXPECT_EQ(data.result, + parser.FindTagParamFromStart(data.token, data.num_params)) + << " for case " << i; + EXPECT_EQ(data.result_pos, parser.GetCurPos()) << " for case " << i; + } +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_standard_crypto_handler.cpp b/core/fpdfapi/fpdf_parser/cpdf_standard_crypto_handler.cpp new file mode 100644 index 0000000000..b73238997c --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_standard_crypto_handler.cpp @@ -0,0 +1,342 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fpdfapi/fpdf_parser/cpdf_standard_crypto_handler.h" + +#include <time.h> + +#include "core/include/fdrm/fx_crypt.h" +#include "core/include/fpdfapi/cpdf_parser.h" +#include "core/include/fpdfapi/cpdf_simple_parser.h" +#include "core/include/fpdfapi/ipdf_security_handler.h" + +IPDF_CryptoHandler::~IPDF_CryptoHandler() {} + +void IPDF_CryptoHandler::Decrypt(FX_DWORD objnum, + FX_DWORD gennum, + CFX_ByteString& str) { + CFX_BinaryBuf dest_buf; + void* context = DecryptStart(objnum, gennum); + DecryptStream(context, (const uint8_t*)str, str.GetLength(), dest_buf); + DecryptFinish(context, dest_buf); + str = dest_buf; +} + +void CPDF_StandardCryptoHandler::CryptBlock(FX_BOOL bEncrypt, + FX_DWORD objnum, + FX_DWORD gennum, + const uint8_t* src_buf, + FX_DWORD src_size, + uint8_t* dest_buf, + FX_DWORD& dest_size) { + if (m_Cipher == FXCIPHER_NONE) { + FXSYS_memcpy(dest_buf, src_buf, src_size); + return; + } + uint8_t realkey[16]; + int realkeylen = 16; + if (m_Cipher != FXCIPHER_AES || m_KeyLen != 32) { + uint8_t key1[32]; + FXSYS_memcpy(key1, m_EncryptKey, m_KeyLen); + key1[m_KeyLen + 0] = (uint8_t)objnum; + key1[m_KeyLen + 1] = (uint8_t)(objnum >> 8); + key1[m_KeyLen + 2] = (uint8_t)(objnum >> 16); + key1[m_KeyLen + 3] = (uint8_t)gennum; + key1[m_KeyLen + 4] = (uint8_t)(gennum >> 8); + FXSYS_memcpy(key1 + m_KeyLen, &objnum, 3); + FXSYS_memcpy(key1 + m_KeyLen + 3, &gennum, 2); + if (m_Cipher == FXCIPHER_AES) { + FXSYS_memcpy(key1 + m_KeyLen + 5, "sAlT", 4); + } + CRYPT_MD5Generate( + key1, m_Cipher == FXCIPHER_AES ? m_KeyLen + 9 : m_KeyLen + 5, realkey); + realkeylen = m_KeyLen + 5; + if (realkeylen > 16) { + realkeylen = 16; + } + } + if (m_Cipher == FXCIPHER_AES) { + CRYPT_AESSetKey(m_pAESContext, 16, m_KeyLen == 32 ? m_EncryptKey : realkey, + m_KeyLen, bEncrypt); + if (bEncrypt) { + uint8_t iv[16]; + for (int i = 0; i < 16; i++) { + iv[i] = (uint8_t)rand(); + } + CRYPT_AESSetIV(m_pAESContext, iv); + FXSYS_memcpy(dest_buf, iv, 16); + int nblocks = src_size / 16; + CRYPT_AESEncrypt(m_pAESContext, dest_buf + 16, src_buf, nblocks * 16); + uint8_t padding[16]; + FXSYS_memcpy(padding, src_buf + nblocks * 16, src_size % 16); + FXSYS_memset(padding + src_size % 16, 16 - src_size % 16, + 16 - src_size % 16); + CRYPT_AESEncrypt(m_pAESContext, dest_buf + nblocks * 16 + 16, padding, + 16); + dest_size = 32 + nblocks * 16; + } else { + CRYPT_AESSetIV(m_pAESContext, src_buf); + CRYPT_AESDecrypt(m_pAESContext, dest_buf, src_buf + 16, src_size - 16); + dest_size = src_size - 16; + dest_size -= dest_buf[dest_size - 1]; + } + } else { + ASSERT(dest_size == src_size); + if (dest_buf != src_buf) { + FXSYS_memcpy(dest_buf, src_buf, src_size); + } + CRYPT_ArcFourCryptBlock(dest_buf, dest_size, realkey, realkeylen); + } +} + +struct AESCryptContext { + uint8_t m_Context[2048]; + FX_BOOL m_bIV; + uint8_t m_Block[16]; + FX_DWORD m_BlockOffset; +}; + +void* CPDF_StandardCryptoHandler::CryptStart(FX_DWORD objnum, + FX_DWORD gennum, + FX_BOOL bEncrypt) { + if (m_Cipher == FXCIPHER_NONE) { + return this; + } + if (m_Cipher == FXCIPHER_AES && m_KeyLen == 32) { + AESCryptContext* pContext = FX_Alloc(AESCryptContext, 1); + pContext->m_bIV = TRUE; + pContext->m_BlockOffset = 0; + CRYPT_AESSetKey(pContext->m_Context, 16, m_EncryptKey, 32, bEncrypt); + if (bEncrypt) { + for (int i = 0; i < 16; i++) { + pContext->m_Block[i] = (uint8_t)rand(); + } + CRYPT_AESSetIV(pContext->m_Context, pContext->m_Block); + } + return pContext; + } + uint8_t key1[48]; + FXSYS_memcpy(key1, m_EncryptKey, m_KeyLen); + FXSYS_memcpy(key1 + m_KeyLen, &objnum, 3); + FXSYS_memcpy(key1 + m_KeyLen + 3, &gennum, 2); + if (m_Cipher == FXCIPHER_AES) { + FXSYS_memcpy(key1 + m_KeyLen + 5, "sAlT", 4); + } + uint8_t realkey[16]; + CRYPT_MD5Generate( + key1, m_Cipher == FXCIPHER_AES ? m_KeyLen + 9 : m_KeyLen + 5, realkey); + int realkeylen = m_KeyLen + 5; + if (realkeylen > 16) { + realkeylen = 16; + } + if (m_Cipher == FXCIPHER_AES) { + AESCryptContext* pContext = FX_Alloc(AESCryptContext, 1); + pContext->m_bIV = TRUE; + pContext->m_BlockOffset = 0; + CRYPT_AESSetKey(pContext->m_Context, 16, realkey, 16, bEncrypt); + if (bEncrypt) { + for (int i = 0; i < 16; i++) { + pContext->m_Block[i] = (uint8_t)rand(); + } + CRYPT_AESSetIV(pContext->m_Context, pContext->m_Block); + } + return pContext; + } + void* pContext = FX_Alloc(uint8_t, 1040); + CRYPT_ArcFourSetup(pContext, realkey, realkeylen); + return pContext; +} +FX_BOOL CPDF_StandardCryptoHandler::CryptStream(void* context, + const uint8_t* src_buf, + FX_DWORD src_size, + CFX_BinaryBuf& dest_buf, + FX_BOOL bEncrypt) { + if (!context) { + return FALSE; + } + if (m_Cipher == FXCIPHER_NONE) { + dest_buf.AppendBlock(src_buf, src_size); + return TRUE; + } + if (m_Cipher == FXCIPHER_RC4) { + int old_size = dest_buf.GetSize(); + dest_buf.AppendBlock(src_buf, src_size); + CRYPT_ArcFourCrypt(context, dest_buf.GetBuffer() + old_size, src_size); + return TRUE; + } + AESCryptContext* pContext = (AESCryptContext*)context; + if (pContext->m_bIV && bEncrypt) { + dest_buf.AppendBlock(pContext->m_Block, 16); + pContext->m_bIV = FALSE; + } + FX_DWORD src_off = 0; + FX_DWORD src_left = src_size; + while (1) { + FX_DWORD copy_size = 16 - pContext->m_BlockOffset; + if (copy_size > src_left) { + copy_size = src_left; + } + FXSYS_memcpy(pContext->m_Block + pContext->m_BlockOffset, src_buf + src_off, + copy_size); + src_off += copy_size; + src_left -= copy_size; + pContext->m_BlockOffset += copy_size; + if (pContext->m_BlockOffset == 16) { + if (!bEncrypt && pContext->m_bIV) { + CRYPT_AESSetIV(pContext->m_Context, pContext->m_Block); + pContext->m_bIV = FALSE; + pContext->m_BlockOffset = 0; + } else if (src_off < src_size) { + uint8_t block_buf[16]; + if (bEncrypt) { + CRYPT_AESEncrypt(pContext->m_Context, block_buf, pContext->m_Block, + 16); + } else { + CRYPT_AESDecrypt(pContext->m_Context, block_buf, pContext->m_Block, + 16); + } + dest_buf.AppendBlock(block_buf, 16); + pContext->m_BlockOffset = 0; + } + } + if (!src_left) { + break; + } + } + return TRUE; +} +FX_BOOL CPDF_StandardCryptoHandler::CryptFinish(void* context, + CFX_BinaryBuf& dest_buf, + FX_BOOL bEncrypt) { + if (!context) { + return FALSE; + } + if (m_Cipher == FXCIPHER_NONE) { + return TRUE; + } + if (m_Cipher == FXCIPHER_RC4) { + FX_Free(context); + return TRUE; + } + AESCryptContext* pContext = (AESCryptContext*)context; + if (bEncrypt) { + uint8_t block_buf[16]; + if (pContext->m_BlockOffset == 16) { + CRYPT_AESEncrypt(pContext->m_Context, block_buf, pContext->m_Block, 16); + dest_buf.AppendBlock(block_buf, 16); + pContext->m_BlockOffset = 0; + } + FXSYS_memset(pContext->m_Block + pContext->m_BlockOffset, + (uint8_t)(16 - pContext->m_BlockOffset), + 16 - pContext->m_BlockOffset); + CRYPT_AESEncrypt(pContext->m_Context, block_buf, pContext->m_Block, 16); + dest_buf.AppendBlock(block_buf, 16); + } else if (pContext->m_BlockOffset == 16) { + uint8_t block_buf[16]; + CRYPT_AESDecrypt(pContext->m_Context, block_buf, pContext->m_Block, 16); + if (block_buf[15] <= 16) { + dest_buf.AppendBlock(block_buf, 16 - block_buf[15]); + } + } + FX_Free(pContext); + return TRUE; +} +void* CPDF_StandardCryptoHandler::DecryptStart(FX_DWORD objnum, + FX_DWORD gennum) { + return CryptStart(objnum, gennum, FALSE); +} +FX_DWORD CPDF_StandardCryptoHandler::DecryptGetSize(FX_DWORD src_size) { + return m_Cipher == FXCIPHER_AES ? src_size - 16 : src_size; +} + +FX_BOOL CPDF_StandardCryptoHandler::Init( + CPDF_Dictionary* pEncryptDict, + IPDF_SecurityHandler* pSecurityHandler) { + const uint8_t* key; + if (!pSecurityHandler->GetCryptInfo(m_Cipher, key, m_KeyLen)) { + return FALSE; + } + if (m_KeyLen > 32 || m_KeyLen < 0) { + return FALSE; + } + if (m_Cipher != FXCIPHER_NONE) { + FXSYS_memcpy(m_EncryptKey, key, m_KeyLen); + } + if (m_Cipher == FXCIPHER_AES) { + m_pAESContext = FX_Alloc(uint8_t, 2048); + } + return TRUE; +} + +FX_BOOL CPDF_StandardCryptoHandler::Init(int cipher, + const uint8_t* key, + int keylen) { + if (cipher == FXCIPHER_AES) { + switch (keylen) { + case 16: + case 24: + case 32: + break; + default: + return FALSE; + } + } else if (cipher == FXCIPHER_AES2) { + if (keylen != 32) { + return FALSE; + } + } else if (cipher == FXCIPHER_RC4) { + if (keylen < 5 || keylen > 16) { + return FALSE; + } + } else { + if (keylen > 32) { + keylen = 32; + } + } + m_Cipher = cipher; + m_KeyLen = keylen; + FXSYS_memcpy(m_EncryptKey, key, keylen); + if (m_Cipher == FXCIPHER_AES) { + m_pAESContext = FX_Alloc(uint8_t, 2048); + } + return TRUE; +} +FX_BOOL CPDF_StandardCryptoHandler::DecryptStream(void* context, + const uint8_t* src_buf, + FX_DWORD src_size, + CFX_BinaryBuf& dest_buf) { + return CryptStream(context, src_buf, src_size, dest_buf, FALSE); +} +FX_BOOL CPDF_StandardCryptoHandler::DecryptFinish(void* context, + CFX_BinaryBuf& dest_buf) { + return CryptFinish(context, dest_buf, FALSE); +} +FX_DWORD CPDF_StandardCryptoHandler::EncryptGetSize(FX_DWORD objnum, + FX_DWORD version, + const uint8_t* src_buf, + FX_DWORD src_size) { + if (m_Cipher == FXCIPHER_AES) { + return src_size + 32; + } + return src_size; +} +FX_BOOL CPDF_StandardCryptoHandler::EncryptContent(FX_DWORD objnum, + FX_DWORD gennum, + const uint8_t* src_buf, + FX_DWORD src_size, + uint8_t* dest_buf, + FX_DWORD& dest_size) { + CryptBlock(TRUE, objnum, gennum, src_buf, src_size, dest_buf, dest_size); + return TRUE; +} +CPDF_StandardCryptoHandler::CPDF_StandardCryptoHandler() { + m_pAESContext = NULL; + m_Cipher = FXCIPHER_NONE; + m_KeyLen = 0; +} +CPDF_StandardCryptoHandler::~CPDF_StandardCryptoHandler() { + FX_Free(m_pAESContext); +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_standard_crypto_handler.h b/core/fpdfapi/fpdf_parser/cpdf_standard_crypto_handler.h new file mode 100644 index 0000000000..28b7503f6e --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_standard_crypto_handler.h @@ -0,0 +1,64 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_FPDFAPI_FPDF_PARSER_CPDF_STANDARD_CRYPTO_HANDLER_H_ +#define CORE_FPDFAPI_FPDF_PARSER_CPDF_STANDARD_CRYPTO_HANDLER_H_ + +#include "core/include/fpdfapi/ipdf_crypto_handler.h" + +class CPDF_StandardCryptoHandler : public IPDF_CryptoHandler { + public: + CPDF_StandardCryptoHandler(); + ~CPDF_StandardCryptoHandler() override; + + // IPDF_CryptoHandler + FX_BOOL Init(CPDF_Dictionary* pEncryptDict, + IPDF_SecurityHandler* pSecurityHandler) override; + FX_DWORD DecryptGetSize(FX_DWORD src_size) override; + void* DecryptStart(FX_DWORD objnum, FX_DWORD gennum) override; + FX_BOOL DecryptStream(void* context, + const uint8_t* src_buf, + FX_DWORD src_size, + CFX_BinaryBuf& dest_buf) override; + FX_BOOL DecryptFinish(void* context, CFX_BinaryBuf& dest_buf) override; + FX_DWORD EncryptGetSize(FX_DWORD objnum, + FX_DWORD version, + const uint8_t* src_buf, + FX_DWORD src_size) override; + FX_BOOL EncryptContent(FX_DWORD objnum, + FX_DWORD version, + const uint8_t* src_buf, + FX_DWORD src_size, + uint8_t* dest_buf, + FX_DWORD& dest_size) override; + + FX_BOOL Init(int cipher, const uint8_t* key, int keylen); + + protected: + virtual void CryptBlock(FX_BOOL bEncrypt, + FX_DWORD objnum, + FX_DWORD gennum, + const uint8_t* src_buf, + FX_DWORD src_size, + uint8_t* dest_buf, + FX_DWORD& dest_size); + virtual void* CryptStart(FX_DWORD objnum, FX_DWORD gennum, FX_BOOL bEncrypt); + virtual FX_BOOL CryptStream(void* context, + const uint8_t* src_buf, + FX_DWORD src_size, + CFX_BinaryBuf& dest_buf, + FX_BOOL bEncrypt); + virtual FX_BOOL CryptFinish(void* context, + CFX_BinaryBuf& dest_buf, + FX_BOOL bEncrypt); + + uint8_t m_EncryptKey[32]; + int m_KeyLen; + int m_Cipher; + uint8_t* m_pAESContext; +}; + +#endif // CORE_FPDFAPI_FPDF_PARSER_CPDF_STANDARD_CRYPTO_HANDLER_H_ diff --git a/core/fpdfapi/fpdf_parser/cpdf_standard_security_handler.cpp b/core/fpdfapi/fpdf_parser/cpdf_standard_security_handler.cpp new file mode 100644 index 0000000000..e5eb4c8469 --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_standard_security_handler.cpp @@ -0,0 +1,714 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fpdfapi/fpdf_parser/cpdf_standard_security_handler.h" + +#include <time.h> + +#include "core/fpdfapi/fpdf_parser/cpdf_standard_crypto_handler.h" +#include "core/include/fdrm/fx_crypt.h" +#include "core/include/fpdfapi/cpdf_array.h" +#include "core/include/fpdfapi/cpdf_dictionary.h" +#include "core/include/fpdfapi/cpdf_object.h" +#include "core/include/fpdfapi/cpdf_parser.h" + +namespace { + +const uint8_t defpasscode[32] = { + 0x28, 0xbf, 0x4e, 0x5e, 0x4e, 0x75, 0x8a, 0x41, 0x64, 0x00, 0x4e, + 0x56, 0xff, 0xfa, 0x01, 0x08, 0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, + 0x3e, 0x80, 0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a}; + +void CalcEncryptKey(CPDF_Dictionary* pEncrypt, + const uint8_t* password, + FX_DWORD pass_size, + uint8_t* key, + int keylen, + FX_BOOL bIgnoreMeta, + CPDF_Array* pIdArray) { + int revision = pEncrypt->GetIntegerBy("R"); + uint8_t passcode[32]; + for (FX_DWORD i = 0; i < 32; i++) { + passcode[i] = i < pass_size ? password[i] : defpasscode[i - pass_size]; + } + uint8_t md5[100]; + CRYPT_MD5Start(md5); + CRYPT_MD5Update(md5, passcode, 32); + CFX_ByteString okey = pEncrypt->GetStringBy("O"); + CRYPT_MD5Update(md5, (uint8_t*)okey.c_str(), okey.GetLength()); + FX_DWORD perm = pEncrypt->GetIntegerBy("P"); + CRYPT_MD5Update(md5, (uint8_t*)&perm, 4); + if (pIdArray) { + CFX_ByteString id = pIdArray->GetStringAt(0); + CRYPT_MD5Update(md5, (uint8_t*)id.c_str(), id.GetLength()); + } + if (!bIgnoreMeta && revision >= 3 && + !pEncrypt->GetIntegerBy("EncryptMetadata", 1)) { + FX_DWORD tag = (FX_DWORD)-1; + CRYPT_MD5Update(md5, (uint8_t*)&tag, 4); + } + uint8_t digest[16]; + CRYPT_MD5Finish(md5, digest); + FX_DWORD copy_len = keylen; + if (copy_len > sizeof(digest)) { + copy_len = sizeof(digest); + } + if (revision >= 3) { + for (int i = 0; i < 50; i++) { + CRYPT_MD5Generate(digest, copy_len, digest); + } + } + FXSYS_memset(key, 0, keylen); + FXSYS_memcpy(key, digest, copy_len); +} + +} // namespace + +IPDF_SecurityHandler::~IPDF_SecurityHandler() {} + +CPDF_StandardSecurityHandler::CPDF_StandardSecurityHandler() { + m_Version = 0; + m_Revision = 0; + m_pParser = NULL; + m_pEncryptDict = NULL; + m_Permissions = 0; + m_Cipher = FXCIPHER_NONE; + m_KeyLen = 0; +} + +CPDF_StandardSecurityHandler::~CPDF_StandardSecurityHandler() {} + +IPDF_CryptoHandler* CPDF_StandardSecurityHandler::CreateCryptoHandler() { + return new CPDF_StandardCryptoHandler; +} + +FX_BOOL CPDF_StandardSecurityHandler::OnInit(CPDF_Parser* pParser, + CPDF_Dictionary* pEncryptDict) { + m_pParser = pParser; + if (!LoadDict(pEncryptDict)) { + return FALSE; + } + if (m_Cipher == FXCIPHER_NONE) { + return TRUE; + } + return CheckSecurity(m_KeyLen); +} +FX_BOOL CPDF_StandardSecurityHandler::CheckSecurity(int32_t key_len) { + CFX_ByteString password = m_pParser->GetPassword(); + if (CheckPassword(password, password.GetLength(), TRUE, m_EncryptKey, + key_len)) { + if (password.IsEmpty()) { + if (!CheckPassword(password, password.GetLength(), FALSE, m_EncryptKey, + key_len)) { + return FALSE; + } + } + return TRUE; + } + return CheckPassword(password, password.GetLength(), FALSE, m_EncryptKey, + key_len); +} +FX_DWORD CPDF_StandardSecurityHandler::GetPermissions() { + return m_Permissions; +} +static FX_BOOL _LoadCryptInfo(CPDF_Dictionary* pEncryptDict, + const CFX_ByteStringC& name, + int& cipher, + int& keylen) { + int Version = pEncryptDict->GetIntegerBy("V"); + cipher = FXCIPHER_RC4; + keylen = 0; + if (Version >= 4) { + CPDF_Dictionary* pCryptFilters = pEncryptDict->GetDictBy("CF"); + if (!pCryptFilters) { + return FALSE; + } + if (name == "Identity") { + cipher = FXCIPHER_NONE; + } else { + CPDF_Dictionary* pDefFilter = pCryptFilters->GetDictBy(name); + if (!pDefFilter) { + return FALSE; + } + int nKeyBits = 0; + if (Version == 4) { + nKeyBits = pDefFilter->GetIntegerBy("Length", 0); + if (nKeyBits == 0) { + nKeyBits = pEncryptDict->GetIntegerBy("Length", 128); + } + } else { + nKeyBits = pEncryptDict->GetIntegerBy("Length", 256); + } + if (nKeyBits < 40) { + nKeyBits *= 8; + } + keylen = nKeyBits / 8; + CFX_ByteString cipher_name = pDefFilter->GetStringBy("CFM"); + if (cipher_name == "AESV2" || cipher_name == "AESV3") { + cipher = FXCIPHER_AES; + } + } + } else { + keylen = Version > 1 ? pEncryptDict->GetIntegerBy("Length", 40) / 8 : 5; + } + if (keylen > 32 || keylen < 0) { + return FALSE; + } + return TRUE; +} + +FX_BOOL CPDF_StandardSecurityHandler::LoadDict(CPDF_Dictionary* pEncryptDict) { + m_pEncryptDict = pEncryptDict; + m_Version = pEncryptDict->GetIntegerBy("V"); + m_Revision = pEncryptDict->GetIntegerBy("R"); + m_Permissions = pEncryptDict->GetIntegerBy("P", -1); + if (m_Version < 4) { + return _LoadCryptInfo(pEncryptDict, CFX_ByteString(), m_Cipher, m_KeyLen); + } + CFX_ByteString stmf_name = pEncryptDict->GetStringBy("StmF"); + CFX_ByteString strf_name = pEncryptDict->GetStringBy("StrF"); + if (stmf_name != strf_name) { + return FALSE; + } + if (!_LoadCryptInfo(pEncryptDict, strf_name, m_Cipher, m_KeyLen)) { + return FALSE; + } + return TRUE; +} + +FX_BOOL CPDF_StandardSecurityHandler::LoadDict(CPDF_Dictionary* pEncryptDict, + FX_DWORD type, + int& cipher, + int& key_len) { + m_pEncryptDict = pEncryptDict; + m_Version = pEncryptDict->GetIntegerBy("V"); + m_Revision = pEncryptDict->GetIntegerBy("R"); + m_Permissions = pEncryptDict->GetIntegerBy("P", -1); + CFX_ByteString strf_name, stmf_name; + if (m_Version >= 4) { + stmf_name = pEncryptDict->GetStringBy("StmF"); + strf_name = pEncryptDict->GetStringBy("StrF"); + if (stmf_name != strf_name) { + return FALSE; + } + } + if (!_LoadCryptInfo(pEncryptDict, strf_name, cipher, key_len)) { + return FALSE; + } + m_Cipher = cipher; + m_KeyLen = key_len; + return TRUE; +} + +FX_BOOL CPDF_StandardSecurityHandler::GetCryptInfo(int& cipher, + const uint8_t*& buffer, + int& keylen) { + cipher = m_Cipher; + buffer = m_EncryptKey; + keylen = m_KeyLen; + return TRUE; +} +#define FX_GET_32WORD(n, b, i) \ + { \ + (n) = (FX_DWORD)( \ + ((uint64_t)(b)[(i)] << 24) | ((uint64_t)(b)[(i) + 1] << 16) | \ + ((uint64_t)(b)[(i) + 2] << 8) | ((uint64_t)(b)[(i) + 3])); \ + } +int BigOrder64BitsMod3(uint8_t* data) { + uint64_t ret = 0; + for (int i = 0; i < 4; ++i) { + FX_DWORD value; + FX_GET_32WORD(value, data, 4 * i); + ret <<= 32; + ret |= value; + ret %= 3; + } + return (int)ret; +} +void Revision6_Hash(const uint8_t* password, + FX_DWORD size, + const uint8_t* salt, + const uint8_t* vector, + uint8_t* hash) { + int iBlockSize = 32; + uint8_t sha[128]; + CRYPT_SHA256Start(sha); + CRYPT_SHA256Update(sha, password, size); + CRYPT_SHA256Update(sha, salt, 8); + if (vector) { + CRYPT_SHA256Update(sha, vector, 48); + } + uint8_t digest[32]; + CRYPT_SHA256Finish(sha, digest); + CFX_ByteTextBuf buf; + uint8_t* input = digest; + uint8_t* key = input; + uint8_t* iv = input + 16; + uint8_t* E = buf.GetBuffer(); + int iBufLen = buf.GetLength(); + CFX_ByteTextBuf interDigest; + int i = 0; + uint8_t* aes = FX_Alloc(uint8_t, 2048); + while (i < 64 || i < E[iBufLen - 1] + 32) { + int iRoundSize = size + iBlockSize; + if (vector) { + iRoundSize += 48; + } + iBufLen = iRoundSize * 64; + buf.EstimateSize(iBufLen); + E = buf.GetBuffer(); + CFX_ByteTextBuf content; + for (int j = 0; j < 64; ++j) { + content.AppendBlock(password, size); + content.AppendBlock(input, iBlockSize); + if (vector) { + content.AppendBlock(vector, 48); + } + } + CRYPT_AESSetKey(aes, 16, key, 16, TRUE); + CRYPT_AESSetIV(aes, iv); + CRYPT_AESEncrypt(aes, E, content.GetBuffer(), iBufLen); + int iHash = 0; + switch (BigOrder64BitsMod3(E)) { + case 0: + iHash = 0; + iBlockSize = 32; + break; + case 1: + iHash = 1; + iBlockSize = 48; + break; + default: + iHash = 2; + iBlockSize = 64; + break; + } + interDigest.EstimateSize(iBlockSize); + input = interDigest.GetBuffer(); + if (iHash == 0) { + CRYPT_SHA256Generate(E, iBufLen, input); + } else if (iHash == 1) { + CRYPT_SHA384Generate(E, iBufLen, input); + } else if (iHash == 2) { + CRYPT_SHA512Generate(E, iBufLen, input); + } + key = input; + iv = input + 16; + ++i; + } + FX_Free(aes); + if (hash) { + FXSYS_memcpy(hash, input, 32); + } +} +FX_BOOL CPDF_StandardSecurityHandler::AES256_CheckPassword( + const uint8_t* password, + FX_DWORD size, + FX_BOOL bOwner, + uint8_t* key) { + CFX_ByteString okey = + m_pEncryptDict ? m_pEncryptDict->GetStringBy("O") : CFX_ByteString(); + if (okey.GetLength() < 48) { + return FALSE; + } + CFX_ByteString ukey = + m_pEncryptDict ? m_pEncryptDict->GetStringBy("U") : CFX_ByteString(); + if (ukey.GetLength() < 48) { + return FALSE; + } + const uint8_t* pkey = bOwner ? (const uint8_t*)okey : (const uint8_t*)ukey; + uint8_t sha[128]; + uint8_t digest[32]; + if (m_Revision >= 6) { + Revision6_Hash(password, size, (const uint8_t*)pkey + 32, + (bOwner ? (const uint8_t*)ukey : NULL), digest); + } else { + CRYPT_SHA256Start(sha); + CRYPT_SHA256Update(sha, password, size); + CRYPT_SHA256Update(sha, pkey + 32, 8); + if (bOwner) { + CRYPT_SHA256Update(sha, ukey, 48); + } + CRYPT_SHA256Finish(sha, digest); + } + if (FXSYS_memcmp(digest, pkey, 32) != 0) { + return FALSE; + } + if (!key) { + return TRUE; + } + if (m_Revision >= 6) { + Revision6_Hash(password, size, (const uint8_t*)pkey + 40, + (bOwner ? (const uint8_t*)ukey : NULL), digest); + } else { + CRYPT_SHA256Start(sha); + CRYPT_SHA256Update(sha, password, size); + CRYPT_SHA256Update(sha, pkey + 40, 8); + if (bOwner) { + CRYPT_SHA256Update(sha, ukey, 48); + } + CRYPT_SHA256Finish(sha, digest); + } + CFX_ByteString ekey = m_pEncryptDict + ? m_pEncryptDict->GetStringBy(bOwner ? "OE" : "UE") + : CFX_ByteString(); + if (ekey.GetLength() < 32) { + return FALSE; + } + uint8_t* aes = FX_Alloc(uint8_t, 2048); + CRYPT_AESSetKey(aes, 16, digest, 32, FALSE); + uint8_t iv[16]; + FXSYS_memset(iv, 0, 16); + CRYPT_AESSetIV(aes, iv); + CRYPT_AESDecrypt(aes, key, ekey, 32); + CRYPT_AESSetKey(aes, 16, key, 32, FALSE); + CRYPT_AESSetIV(aes, iv); + CFX_ByteString perms = m_pEncryptDict->GetStringBy("Perms"); + if (perms.IsEmpty()) { + return FALSE; + } + uint8_t perms_buf[16]; + FXSYS_memset(perms_buf, 0, sizeof(perms_buf)); + FX_DWORD copy_len = sizeof(perms_buf); + if (copy_len > (FX_DWORD)perms.GetLength()) { + copy_len = perms.GetLength(); + } + FXSYS_memcpy(perms_buf, (const uint8_t*)perms, copy_len); + uint8_t buf[16]; + CRYPT_AESDecrypt(aes, buf, perms_buf, 16); + FX_Free(aes); + if (buf[9] != 'a' || buf[10] != 'd' || buf[11] != 'b') { + return FALSE; + } + if (FXDWORD_GET_LSBFIRST(buf) != m_Permissions) { + return FALSE; + } + if ((buf[8] == 'T' && !IsMetadataEncrypted()) || + (buf[8] == 'F' && IsMetadataEncrypted())) { + return FALSE; + } + return TRUE; +} + +int CPDF_StandardSecurityHandler::CheckPassword(const uint8_t* password, + FX_DWORD size, + FX_BOOL bOwner, + uint8_t* key, + int32_t key_len) { + if (m_Revision >= 5) { + return AES256_CheckPassword(password, size, bOwner, key); + } + uint8_t keybuf[32]; + if (!key) { + key = keybuf; + } + if (bOwner) { + return CheckOwnerPassword(password, size, key, key_len); + } + return CheckUserPassword(password, size, FALSE, key, key_len) || + CheckUserPassword(password, size, TRUE, key, key_len); +} +FX_BOOL CPDF_StandardSecurityHandler::CheckUserPassword( + const uint8_t* password, + FX_DWORD pass_size, + FX_BOOL bIgnoreEncryptMeta, + uint8_t* key, + int32_t key_len) { + CalcEncryptKey(m_pEncryptDict, password, pass_size, key, key_len, + bIgnoreEncryptMeta, m_pParser->GetIDArray()); + CFX_ByteString ukey = + m_pEncryptDict ? m_pEncryptDict->GetStringBy("U") : CFX_ByteString(); + if (ukey.GetLength() < 16) { + return FALSE; + } + uint8_t ukeybuf[32]; + if (m_Revision == 2) { + FXSYS_memcpy(ukeybuf, defpasscode, 32); + CRYPT_ArcFourCryptBlock(ukeybuf, 32, key, key_len); + } else { + uint8_t test[32], tmpkey[32]; + FX_DWORD copy_len = sizeof(test); + if (copy_len > (FX_DWORD)ukey.GetLength()) { + copy_len = ukey.GetLength(); + } + FXSYS_memset(test, 0, sizeof(test)); + FXSYS_memset(tmpkey, 0, sizeof(tmpkey)); + FXSYS_memcpy(test, ukey.c_str(), copy_len); + for (int i = 19; i >= 0; i--) { + for (int j = 0; j < key_len; j++) { + tmpkey[j] = key[j] ^ i; + } + CRYPT_ArcFourCryptBlock(test, 32, tmpkey, key_len); + } + uint8_t md5[100]; + CRYPT_MD5Start(md5); + CRYPT_MD5Update(md5, defpasscode, 32); + CPDF_Array* pIdArray = m_pParser->GetIDArray(); + if (pIdArray) { + CFX_ByteString id = pIdArray->GetStringAt(0); + CRYPT_MD5Update(md5, (uint8_t*)id.c_str(), id.GetLength()); + } + CRYPT_MD5Finish(md5, ukeybuf); + return FXSYS_memcmp(test, ukeybuf, 16) == 0; + } + if (FXSYS_memcmp((void*)ukey.c_str(), ukeybuf, 16) == 0) { + return TRUE; + } + return FALSE; +} +CFX_ByteString CPDF_StandardSecurityHandler::GetUserPassword( + const uint8_t* owner_pass, + FX_DWORD pass_size, + int32_t key_len) { + CFX_ByteString okey = m_pEncryptDict->GetStringBy("O"); + uint8_t passcode[32]; + FX_DWORD i; + for (i = 0; i < 32; i++) { + passcode[i] = i < pass_size ? owner_pass[i] : defpasscode[i - pass_size]; + } + uint8_t digest[16]; + CRYPT_MD5Generate(passcode, 32, digest); + if (m_Revision >= 3) { + for (int i = 0; i < 50; i++) { + CRYPT_MD5Generate(digest, 16, digest); + } + } + uint8_t enckey[32]; + FXSYS_memset(enckey, 0, sizeof(enckey)); + FX_DWORD copy_len = key_len; + if (copy_len > sizeof(digest)) { + copy_len = sizeof(digest); + } + FXSYS_memcpy(enckey, digest, copy_len); + int okeylen = okey.GetLength(); + if (okeylen > 32) { + okeylen = 32; + } + uint8_t okeybuf[64]; + FXSYS_memset(okeybuf, 0, sizeof(okeybuf)); + FXSYS_memcpy(okeybuf, okey.c_str(), okeylen); + if (m_Revision == 2) { + CRYPT_ArcFourCryptBlock(okeybuf, okeylen, enckey, key_len); + } else { + for (int i = 19; i >= 0; i--) { + uint8_t tempkey[32]; + FXSYS_memset(tempkey, 0, sizeof(tempkey)); + for (int j = 0; j < m_KeyLen; j++) { + tempkey[j] = enckey[j] ^ i; + } + CRYPT_ArcFourCryptBlock(okeybuf, okeylen, tempkey, key_len); + } + } + int len = 32; + while (len && defpasscode[len - 1] == okeybuf[len - 1]) { + len--; + } + return CFX_ByteString(okeybuf, len); +} +FX_BOOL CPDF_StandardSecurityHandler::CheckOwnerPassword( + const uint8_t* password, + FX_DWORD pass_size, + uint8_t* key, + int32_t key_len) { + CFX_ByteString user_pass = GetUserPassword(password, pass_size, key_len); + if (CheckUserPassword(user_pass, user_pass.GetLength(), FALSE, key, + key_len)) { + return TRUE; + } + return CheckUserPassword(user_pass, user_pass.GetLength(), TRUE, key, + key_len); +} +FX_BOOL CPDF_StandardSecurityHandler::IsMetadataEncrypted() { + return m_pEncryptDict->GetBooleanBy("EncryptMetadata", TRUE); +} + +void CPDF_StandardSecurityHandler::OnCreate(CPDF_Dictionary* pEncryptDict, + CPDF_Array* pIdArray, + const uint8_t* user_pass, + FX_DWORD user_size, + const uint8_t* owner_pass, + FX_DWORD owner_size, + FX_BOOL bDefault, + FX_DWORD type) { + int cipher = 0, key_len = 0; + if (!LoadDict(pEncryptDict, type, cipher, key_len)) { + return; + } + if (bDefault && (!owner_pass || owner_size == 0)) { + owner_pass = user_pass; + owner_size = user_size; + } + if (m_Revision >= 5) { + int t = (int)time(NULL); + uint8_t sha[128]; + CRYPT_SHA256Start(sha); + CRYPT_SHA256Update(sha, (uint8_t*)&t, sizeof t); + CRYPT_SHA256Update(sha, m_EncryptKey, 32); + CRYPT_SHA256Update(sha, (uint8_t*)"there", 5); + CRYPT_SHA256Finish(sha, m_EncryptKey); + AES256_SetPassword(pEncryptDict, user_pass, user_size, FALSE, m_EncryptKey); + if (bDefault) { + AES256_SetPassword(pEncryptDict, owner_pass, owner_size, TRUE, + m_EncryptKey); + AES256_SetPerms(pEncryptDict, m_Permissions, + pEncryptDict->GetBooleanBy("EncryptMetadata", TRUE), + m_EncryptKey); + } + return; + } + if (bDefault) { + uint8_t passcode[32]; + FX_DWORD i; + for (i = 0; i < 32; i++) { + passcode[i] = + i < owner_size ? owner_pass[i] : defpasscode[i - owner_size]; + } + uint8_t digest[16]; + CRYPT_MD5Generate(passcode, 32, digest); + if (m_Revision >= 3) { + for (int i = 0; i < 50; i++) { + CRYPT_MD5Generate(digest, 16, digest); + } + } + uint8_t enckey[32]; + FXSYS_memcpy(enckey, digest, key_len); + for (i = 0; i < 32; i++) { + passcode[i] = i < user_size ? user_pass[i] : defpasscode[i - user_size]; + } + CRYPT_ArcFourCryptBlock(passcode, 32, enckey, key_len); + uint8_t tempkey[32]; + if (m_Revision >= 3) { + for (i = 1; i <= 19; i++) { + for (int j = 0; j < key_len; j++) { + tempkey[j] = enckey[j] ^ (uint8_t)i; + } + CRYPT_ArcFourCryptBlock(passcode, 32, tempkey, key_len); + } + } + pEncryptDict->SetAtString("O", CFX_ByteString(passcode, 32)); + } + CalcEncryptKey(m_pEncryptDict, (uint8_t*)user_pass, user_size, m_EncryptKey, + key_len, FALSE, pIdArray); + if (m_Revision < 3) { + uint8_t tempbuf[32]; + FXSYS_memcpy(tempbuf, defpasscode, 32); + CRYPT_ArcFourCryptBlock(tempbuf, 32, m_EncryptKey, key_len); + pEncryptDict->SetAtString("U", CFX_ByteString(tempbuf, 32)); + } else { + uint8_t md5[100]; + CRYPT_MD5Start(md5); + CRYPT_MD5Update(md5, defpasscode, 32); + if (pIdArray) { + CFX_ByteString id = pIdArray->GetStringAt(0); + CRYPT_MD5Update(md5, (uint8_t*)id.c_str(), id.GetLength()); + } + uint8_t digest[32]; + CRYPT_MD5Finish(md5, digest); + CRYPT_ArcFourCryptBlock(digest, 16, m_EncryptKey, key_len); + uint8_t tempkey[32]; + for (int i = 1; i <= 19; i++) { + for (int j = 0; j < key_len; j++) { + tempkey[j] = m_EncryptKey[j] ^ (uint8_t)i; + } + CRYPT_ArcFourCryptBlock(digest, 16, tempkey, key_len); + } + CRYPT_MD5Generate(digest, 16, digest + 16); + pEncryptDict->SetAtString("U", CFX_ByteString(digest, 32)); + } +} +void CPDF_StandardSecurityHandler::OnCreate(CPDF_Dictionary* pEncryptDict, + CPDF_Array* pIdArray, + const uint8_t* user_pass, + FX_DWORD user_size, + const uint8_t* owner_pass, + FX_DWORD owner_size, + FX_DWORD type) { + OnCreate(pEncryptDict, pIdArray, user_pass, user_size, owner_pass, owner_size, + TRUE, type); +} +void CPDF_StandardSecurityHandler::OnCreate(CPDF_Dictionary* pEncryptDict, + CPDF_Array* pIdArray, + const uint8_t* user_pass, + FX_DWORD user_size, + FX_DWORD type) { + OnCreate(pEncryptDict, pIdArray, user_pass, user_size, NULL, 0, FALSE, type); +} +void CPDF_StandardSecurityHandler::AES256_SetPassword( + CPDF_Dictionary* pEncryptDict, + const uint8_t* password, + FX_DWORD size, + FX_BOOL bOwner, + const uint8_t* key) { + uint8_t sha[128]; + CRYPT_SHA1Start(sha); + CRYPT_SHA1Update(sha, key, 32); + CRYPT_SHA1Update(sha, (uint8_t*)"hello", 5); + uint8_t digest[20]; + CRYPT_SHA1Finish(sha, digest); + CFX_ByteString ukey = pEncryptDict->GetStringBy("U"); + uint8_t digest1[48]; + if (m_Revision >= 6) { + Revision6_Hash(password, size, digest, + (bOwner ? (const uint8_t*)ukey : NULL), digest1); + } else { + CRYPT_SHA256Start(sha); + CRYPT_SHA256Update(sha, password, size); + CRYPT_SHA256Update(sha, digest, 8); + if (bOwner) { + CRYPT_SHA256Update(sha, ukey, ukey.GetLength()); + } + CRYPT_SHA256Finish(sha, digest1); + } + FXSYS_memcpy(digest1 + 32, digest, 16); + pEncryptDict->SetAtString(bOwner ? "O" : "U", CFX_ByteString(digest1, 48)); + if (m_Revision >= 6) { + Revision6_Hash(password, size, digest + 8, + (bOwner ? (const uint8_t*)ukey : NULL), digest1); + } else { + CRYPT_SHA256Start(sha); + CRYPT_SHA256Update(sha, password, size); + CRYPT_SHA256Update(sha, digest + 8, 8); + if (bOwner) { + CRYPT_SHA256Update(sha, ukey, ukey.GetLength()); + } + CRYPT_SHA256Finish(sha, digest1); + } + uint8_t* aes = FX_Alloc(uint8_t, 2048); + CRYPT_AESSetKey(aes, 16, digest1, 32, TRUE); + uint8_t iv[16]; + FXSYS_memset(iv, 0, 16); + CRYPT_AESSetIV(aes, iv); + CRYPT_AESEncrypt(aes, digest1, key, 32); + FX_Free(aes); + pEncryptDict->SetAtString(bOwner ? "OE" : "UE", CFX_ByteString(digest1, 32)); +} +void CPDF_StandardSecurityHandler::AES256_SetPerms( + CPDF_Dictionary* pEncryptDict, + FX_DWORD permissions, + FX_BOOL bEncryptMetadata, + const uint8_t* key) { + uint8_t buf[16]; + buf[0] = (uint8_t)permissions; + buf[1] = (uint8_t)(permissions >> 8); + buf[2] = (uint8_t)(permissions >> 16); + buf[3] = (uint8_t)(permissions >> 24); + buf[4] = 0xff; + buf[5] = 0xff; + buf[6] = 0xff; + buf[7] = 0xff; + buf[8] = bEncryptMetadata ? 'T' : 'F'; + buf[9] = 'a'; + buf[10] = 'd'; + buf[11] = 'b'; + uint8_t* aes = FX_Alloc(uint8_t, 2048); + CRYPT_AESSetKey(aes, 16, key, 32, TRUE); + uint8_t iv[16], buf1[16]; + FXSYS_memset(iv, 0, 16); + CRYPT_AESSetIV(aes, iv); + CRYPT_AESEncrypt(aes, buf1, buf, 16); + FX_Free(aes); + pEncryptDict->SetAtString("Perms", CFX_ByteString(buf1, 16)); +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_standard_security_handler.h b/core/fpdfapi/fpdf_parser/cpdf_standard_security_handler.h new file mode 100644 index 0000000000..3341ec391c --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_standard_security_handler.h @@ -0,0 +1,105 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_FPDFAPI_FPDF_PARSER_CPDF_STANDARD_SECURITY_HANDLER_H_ +#define CORE_FPDFAPI_FPDF_PARSER_CPDF_STANDARD_SECURITY_HANDLER_H_ + +#include "core/include/fpdfapi/ipdf_security_handler.h" +#include "core/include/fxcrt/fx_string.h" +#include "core/include/fxcrt/fx_system.h" + +class CPDF_Array; + +#define PDF_ENCRYPT_CONTENT 0 + +class CPDF_StandardSecurityHandler : public IPDF_SecurityHandler { + public: + CPDF_StandardSecurityHandler(); + ~CPDF_StandardSecurityHandler() override; + + // IPDF_SecurityHandler: + FX_BOOL OnInit(CPDF_Parser* pParser, CPDF_Dictionary* pEncryptDict) override; + FX_DWORD GetPermissions() override; + FX_BOOL GetCryptInfo(int& cipher, + const uint8_t*& buffer, + int& keylen) override; + FX_BOOL IsMetadataEncrypted() override; + IPDF_CryptoHandler* CreateCryptoHandler() override; + + void OnCreate(CPDF_Dictionary* pEncryptDict, + CPDF_Array* pIdArray, + const uint8_t* user_pass, + FX_DWORD user_size, + const uint8_t* owner_pass, + FX_DWORD owner_size, + FX_DWORD type = PDF_ENCRYPT_CONTENT); + + void OnCreate(CPDF_Dictionary* pEncryptDict, + CPDF_Array* pIdArray, + const uint8_t* user_pass, + FX_DWORD user_size, + FX_DWORD type = PDF_ENCRYPT_CONTENT); + + CFX_ByteString GetUserPassword(const uint8_t* owner_pass, + FX_DWORD pass_size, + int32_t key_len); + int CheckPassword(const uint8_t* password, + FX_DWORD pass_size, + FX_BOOL bOwner, + uint8_t* key, + int key_len); + + private: + FX_BOOL LoadDict(CPDF_Dictionary* pEncryptDict); + FX_BOOL LoadDict(CPDF_Dictionary* pEncryptDict, + FX_DWORD type, + int& cipher, + int& key_len); + + FX_BOOL CheckUserPassword(const uint8_t* password, + FX_DWORD pass_size, + FX_BOOL bIgnoreEncryptMeta, + uint8_t* key, + int32_t key_len); + + FX_BOOL CheckOwnerPassword(const uint8_t* password, + FX_DWORD pass_size, + uint8_t* key, + int32_t key_len); + FX_BOOL AES256_CheckPassword(const uint8_t* password, + FX_DWORD size, + FX_BOOL bOwner, + uint8_t* key); + void AES256_SetPassword(CPDF_Dictionary* pEncryptDict, + const uint8_t* password, + FX_DWORD size, + FX_BOOL bOwner, + const uint8_t* key); + void AES256_SetPerms(CPDF_Dictionary* pEncryptDict, + FX_DWORD permission, + FX_BOOL bEncryptMetadata, + const uint8_t* key); + void OnCreate(CPDF_Dictionary* pEncryptDict, + CPDF_Array* pIdArray, + const uint8_t* user_pass, + FX_DWORD user_size, + const uint8_t* owner_pass, + FX_DWORD owner_size, + FX_BOOL bDefault, + FX_DWORD type); + FX_BOOL CheckSecurity(int32_t key_len); + + int m_Version; + int m_Revision; + CPDF_Parser* m_pParser; + CPDF_Dictionary* m_pEncryptDict; + FX_DWORD m_Permissions; + int m_Cipher; + uint8_t m_EncryptKey[32]; + int m_KeyLen; +}; + +#endif // CORE_FPDFAPI_FPDF_PARSER_CPDF_STANDARD_SECURITY_HANDLER_H_ diff --git a/core/fpdfapi/fpdf_parser/cpdf_stream.cpp b/core/fpdfapi/fpdf_parser/cpdf_stream.cpp new file mode 100644 index 0000000000..45c946ab5f --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_stream.cpp @@ -0,0 +1,232 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/include/fpdfapi/cpdf_stream.h" + +#include "core/include/fpdfapi/cpdf_dictionary.h" +#include "core/include/fpdfapi/fpdf_parser_decode.h" + +CPDF_Stream::CPDF_Stream(uint8_t* pData, FX_DWORD size, CPDF_Dictionary* pDict) + : m_pDict(pDict), + m_dwSize(size), + m_GenNum(kMemoryBasedGenNum), + m_pDataBuf(pData) {} + +CPDF_Stream::~CPDF_Stream() { + if (IsMemoryBased()) + FX_Free(m_pDataBuf); + + if (m_pDict) + m_pDict->Release(); +} + +CPDF_Object::Type CPDF_Stream::GetType() const { + return STREAM; +} + +CPDF_Dictionary* CPDF_Stream::GetDict() const { + return m_pDict; +} + +bool CPDF_Stream::IsStream() const { + return true; +} + +CPDF_Stream* CPDF_Stream::AsStream() { + return this; +} + +const CPDF_Stream* CPDF_Stream::AsStream() const { + return this; +} + +void CPDF_Stream::InitStreamInternal(CPDF_Dictionary* pDict) { + if (pDict) { + if (m_pDict) + m_pDict->Release(); + m_pDict = pDict; + } + if (IsMemoryBased()) + FX_Free(m_pDataBuf); + + m_GenNum = 0; + m_pFile = nullptr; +} + +void CPDF_Stream::InitStream(uint8_t* pData, + FX_DWORD size, + CPDF_Dictionary* pDict) { + InitStreamInternal(pDict); + m_GenNum = kMemoryBasedGenNum; + m_pDataBuf = FX_Alloc(uint8_t, size); + if (pData) + FXSYS_memcpy(m_pDataBuf, pData, size); + + m_dwSize = size; + if (m_pDict) + m_pDict->SetAtInteger("Length", size); +} + +CPDF_Object* CPDF_Stream::Clone(FX_BOOL bDirect) const { + CPDF_StreamAcc acc; + acc.LoadAllData(this, TRUE); + FX_DWORD streamSize = acc.GetSize(); + CPDF_Dictionary* pDict = GetDict(); + if (pDict) + pDict = ToDictionary(pDict->Clone(bDirect)); + + return new CPDF_Stream(acc.DetachData(), streamSize, pDict); +} + +void CPDF_Stream::SetData(const uint8_t* pData, + FX_DWORD size, + FX_BOOL bCompressed, + FX_BOOL bKeepBuf) { + if (IsMemoryBased()) + FX_Free(m_pDataBuf); + m_GenNum = kMemoryBasedGenNum; + + if (bKeepBuf) { + m_pDataBuf = const_cast<uint8_t*>(pData); + } else { + m_pDataBuf = FX_Alloc(uint8_t, size); + if (pData) { + FXSYS_memcpy(m_pDataBuf, pData, size); + } + } + m_dwSize = size; + if (!m_pDict) + m_pDict = new CPDF_Dictionary; + m_pDict->SetAtInteger("Length", size); + if (!bCompressed) { + m_pDict->RemoveAt("Filter"); + m_pDict->RemoveAt("DecodeParms"); + } +} + +FX_BOOL CPDF_Stream::ReadRawData(FX_FILESIZE offset, + uint8_t* buf, + FX_DWORD size) const { + if (!IsMemoryBased() && m_pFile) + return m_pFile->ReadBlock(buf, offset, size); + + if (m_pDataBuf) + FXSYS_memcpy(buf, m_pDataBuf + offset, size); + + return TRUE; +} + +void CPDF_Stream::InitStreamFromFile(IFX_FileRead* pFile, + CPDF_Dictionary* pDict) { + InitStreamInternal(pDict); + m_pFile = pFile; + m_dwSize = (FX_DWORD)pFile->GetSize(); + if (m_pDict) + m_pDict->SetAtInteger("Length", m_dwSize); +} + +CFX_WideString CPDF_Stream::GetUnicodeText() const { + CPDF_StreamAcc stream; + stream.LoadAllData(this, FALSE); + return PDF_DecodeText(stream.GetData(), stream.GetSize()); +} + +CPDF_StreamAcc::CPDF_StreamAcc() + : m_pData(nullptr), + m_dwSize(0), + m_bNewBuf(FALSE), + m_pImageParam(nullptr), + m_pStream(nullptr), + m_pSrcData(nullptr) {} + +void CPDF_StreamAcc::LoadAllData(const CPDF_Stream* pStream, + FX_BOOL bRawAccess, + FX_DWORD estimated_size, + FX_BOOL bImageAcc) { + if (!pStream) + return; + + m_pStream = pStream; + if (pStream->IsMemoryBased() && + (!pStream->GetDict()->KeyExist("Filter") || bRawAccess)) { + m_dwSize = pStream->GetRawSize(); + m_pData = pStream->GetRawData(); + return; + } + uint8_t* pSrcData; + FX_DWORD dwSrcSize = pStream->GetRawSize(); + if (dwSrcSize == 0) + return; + + if (!pStream->IsMemoryBased()) { + pSrcData = m_pSrcData = FX_Alloc(uint8_t, dwSrcSize); + if (!pStream->ReadRawData(0, pSrcData, dwSrcSize)) + return; + } else { + pSrcData = pStream->GetRawData(); + } + uint8_t* pDecryptedData = pSrcData; + FX_DWORD dwDecryptedSize = dwSrcSize; + if (!pStream->GetDict()->KeyExist("Filter") || bRawAccess) { + m_pData = pDecryptedData; + m_dwSize = dwDecryptedSize; + } else { + FX_BOOL bRet = PDF_DataDecode( + pDecryptedData, dwDecryptedSize, m_pStream->GetDict(), m_pData, + m_dwSize, m_ImageDecoder, m_pImageParam, estimated_size, bImageAcc); + if (!bRet) { + m_pData = pDecryptedData; + m_dwSize = dwDecryptedSize; + } + } + if (pSrcData != pStream->GetRawData() && pSrcData != m_pData) { + FX_Free(pSrcData); + } + if (pDecryptedData != pSrcData && pDecryptedData != m_pData) { + FX_Free(pDecryptedData); + } + m_pSrcData = nullptr; + m_bNewBuf = m_pData != pStream->GetRawData(); +} + +CPDF_StreamAcc::~CPDF_StreamAcc() { + if (m_bNewBuf) { + FX_Free(m_pData); + } + FX_Free(m_pSrcData); +} + +const uint8_t* CPDF_StreamAcc::GetData() const { + if (m_bNewBuf) { + return m_pData; + } + if (!m_pStream) { + return nullptr; + } + return m_pStream->GetRawData(); +} + +FX_DWORD CPDF_StreamAcc::GetSize() const { + if (m_bNewBuf) { + return m_dwSize; + } + if (!m_pStream) { + return 0; + } + return m_pStream->GetRawSize(); +} + +uint8_t* CPDF_StreamAcc::DetachData() { + if (m_bNewBuf) { + uint8_t* p = m_pData; + m_pData = nullptr; + m_dwSize = 0; + return p; + } + uint8_t* p = FX_Alloc(uint8_t, m_dwSize); + FXSYS_memcpy(p, m_pData, m_dwSize); + return p; +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_string.cpp b/core/fpdfapi/fpdf_parser/cpdf_string.cpp new file mode 100644 index 0000000000..1d0bf0b829 --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_string.cpp @@ -0,0 +1,56 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/include/fpdfapi/cpdf_string.h" + +#include "core/include/fpdfapi/fpdf_parser_decode.h" + +CPDF_String::CPDF_String() : m_bHex(FALSE) {} + +CPDF_String::CPDF_String(const CFX_ByteString& str, FX_BOOL bHex) + : m_String(str), m_bHex(bHex) {} + +CPDF_String::CPDF_String(const CFX_WideString& str) : m_bHex(FALSE) { + m_String = PDF_EncodeText(str); +} + +CPDF_String::~CPDF_String() {} + +CPDF_Object::Type CPDF_String::GetType() const { + return STRING; +} + +CPDF_Object* CPDF_String::Clone(FX_BOOL bDirect) const { + return new CPDF_String(m_String, m_bHex); +} + +CFX_ByteString CPDF_String::GetString() const { + return m_String; +} + +CFX_ByteStringC CPDF_String::GetConstString() const { + return CFX_ByteStringC(m_String); +} + +void CPDF_String::SetString(const CFX_ByteString& str) { + m_String = str; +} + +bool CPDF_String::IsString() const { + return true; +} + +CPDF_String* CPDF_String::AsString() { + return this; +} + +const CPDF_String* CPDF_String::AsString() const { + return this; +} + +CFX_WideString CPDF_String::GetUnicodeText() const { + return PDF_DecodeText(m_String); +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp b/core/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp new file mode 100644 index 0000000000..62be48818b --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp @@ -0,0 +1,990 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" + +#include <vector> + +#include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h" +#include "core/include/fpdfapi/cpdf_array.h" +#include "core/include/fpdfapi/cpdf_boolean.h" +#include "core/include/fpdfapi/cpdf_dictionary.h" +#include "core/include/fpdfapi/cpdf_name.h" +#include "core/include/fpdfapi/cpdf_null.h" +#include "core/include/fpdfapi/cpdf_number.h" +#include "core/include/fpdfapi/cpdf_reference.h" +#include "core/include/fpdfapi/cpdf_stream.h" +#include "core/include/fpdfapi/cpdf_string.h" +#include "core/include/fpdfapi/fpdf_module.h" +#include "core/include/fpdfapi/fpdf_parser_decode.h" +#include "core/include/fpdfapi/ipdf_crypto_handler.h" +#include "core/include/fxcrt/fx_ext.h" +#include "third_party/base/numerics/safe_math.h" + +namespace { + +struct SearchTagRecord { + const char* m_pTag; + FX_DWORD m_Len; + FX_DWORD m_Offset; +}; + +} // namespace + +// static +int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0; + +CPDF_SyntaxParser::CPDF_SyntaxParser() + : m_MetadataObjnum(0), + m_pFileAccess(nullptr), + m_pFileBuf(nullptr), + m_BufSize(CPDF_ModuleMgr::kFileBufSize) {} + +CPDF_SyntaxParser::~CPDF_SyntaxParser() { + FX_Free(m_pFileBuf); +} + +FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) { + CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); + m_Pos = pos; + return GetNextChar(ch); +} + +FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) { + FX_FILESIZE pos = m_Pos + m_HeaderOffset; + if (pos >= m_FileLen) + return FALSE; + + if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) { + FX_FILESIZE read_pos = pos; + FX_DWORD read_size = m_BufSize; + if ((FX_FILESIZE)read_size > m_FileLen) + read_size = (FX_DWORD)m_FileLen; + + if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) { + if (m_FileLen < (FX_FILESIZE)read_size) { + read_pos = 0; + read_size = (FX_DWORD)m_FileLen; + } else { + read_pos = m_FileLen - read_size; + } + } + + if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) + return FALSE; + + m_BufOffset = read_pos; + } + ch = m_pFileBuf[pos - m_BufOffset]; + m_Pos++; + return TRUE; +} + +FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) { + pos += m_HeaderOffset; + if (pos >= m_FileLen) + return FALSE; + + if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) { + FX_FILESIZE read_pos; + if (pos < (FX_FILESIZE)m_BufSize) + read_pos = 0; + else + read_pos = pos - m_BufSize + 1; + + FX_DWORD read_size = m_BufSize; + if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) { + if (m_FileLen < (FX_FILESIZE)read_size) { + read_pos = 0; + read_size = (FX_DWORD)m_FileLen; + } else { + read_pos = m_FileLen - read_size; + } + } + + if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) + return FALSE; + + m_BufOffset = read_pos; + } + ch = m_pFileBuf[pos - m_BufOffset]; + return TRUE; +} + +FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) { + if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size)) + return FALSE; + m_Pos += size; + return TRUE; +} + +void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) { + m_WordSize = 0; + if (bIsNumber) + *bIsNumber = true; + + uint8_t ch; + if (!GetNextChar(ch)) + return; + + while (1) { + while (PDFCharIsWhitespace(ch)) { + if (!GetNextChar(ch)) + return; + } + + if (ch != '%') + break; + + while (1) { + if (!GetNextChar(ch)) + return; + if (PDFCharIsLineEnding(ch)) + break; + } + } + + if (PDFCharIsDelimiter(ch)) { + if (bIsNumber) + *bIsNumber = false; + + m_WordBuffer[m_WordSize++] = ch; + if (ch == '/') { + while (1) { + if (!GetNextChar(ch)) + return; + + if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { + m_Pos--; + return; + } + + if (m_WordSize < sizeof(m_WordBuffer) - 1) + m_WordBuffer[m_WordSize++] = ch; + } + } else if (ch == '<') { + if (!GetNextChar(ch)) + return; + + if (ch == '<') + m_WordBuffer[m_WordSize++] = ch; + else + m_Pos--; + } else if (ch == '>') { + if (!GetNextChar(ch)) + return; + + if (ch == '>') + m_WordBuffer[m_WordSize++] = ch; + else + m_Pos--; + } + return; + } + + while (1) { + if (m_WordSize < sizeof(m_WordBuffer) - 1) + m_WordBuffer[m_WordSize++] = ch; + + if (!PDFCharIsNumeric(ch)) { + if (bIsNumber) + *bIsNumber = false; + } + + if (!GetNextChar(ch)) + return; + + if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { + m_Pos--; + break; + } + } +} + +CFX_ByteString CPDF_SyntaxParser::ReadString() { + uint8_t ch; + if (!GetNextChar(ch)) + return CFX_ByteString(); + + CFX_ByteTextBuf buf; + int32_t parlevel = 0; + int32_t status = 0; + int32_t iEscCode = 0; + while (1) { + switch (status) { + case 0: + if (ch == ')') { + if (parlevel == 0) { + return buf.GetByteString(); + } + parlevel--; + buf.AppendChar(')'); + } else if (ch == '(') { + parlevel++; + buf.AppendChar('('); + } else if (ch == '\\') { + status = 1; + } else { + buf.AppendChar(ch); + } + break; + case 1: + if (ch >= '0' && ch <= '7') { + iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); + status = 2; + break; + } + + if (ch == 'n') { + buf.AppendChar('\n'); + } else if (ch == 'r') { + buf.AppendChar('\r'); + } else if (ch == 't') { + buf.AppendChar('\t'); + } else if (ch == 'b') { + buf.AppendChar('\b'); + } else if (ch == 'f') { + buf.AppendChar('\f'); + } else if (ch == '\r') { + status = 4; + break; + } else if (ch != '\n') { + buf.AppendChar(ch); + } + status = 0; + break; + case 2: + if (ch >= '0' && ch <= '7') { + iEscCode = + iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); + status = 3; + } else { + buf.AppendChar(iEscCode); + status = 0; + continue; + } + break; + case 3: + if (ch >= '0' && ch <= '7') { + iEscCode = + iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); + buf.AppendChar(iEscCode); + status = 0; + } else { + buf.AppendChar(iEscCode); + status = 0; + continue; + } + break; + case 4: + status = 0; + if (ch != '\n') + continue; + break; + } + + if (!GetNextChar(ch)) + break; + } + + GetNextChar(ch); + return buf.GetByteString(); +} + +CFX_ByteString CPDF_SyntaxParser::ReadHexString() { + uint8_t ch; + if (!GetNextChar(ch)) + return CFX_ByteString(); + + CFX_ByteTextBuf buf; + bool bFirst = true; + uint8_t code = 0; + while (1) { + if (ch == '>') + break; + + if (std::isxdigit(ch)) { + int val = FXSYS_toHexDigit(ch); + if (bFirst) { + code = val * 16; + } else { + code += val; + buf.AppendByte(code); + } + bFirst = !bFirst; + } + + if (!GetNextChar(ch)) + break; + } + if (!bFirst) + buf.AppendByte(code); + + return buf.GetByteString(); +} + +void CPDF_SyntaxParser::ToNextLine() { + uint8_t ch; + while (GetNextChar(ch)) { + if (ch == '\n') + break; + + if (ch == '\r') { + GetNextChar(ch); + if (ch != '\n') + --m_Pos; + break; + } + } +} + +void CPDF_SyntaxParser::ToNextWord() { + uint8_t ch; + if (!GetNextChar(ch)) + return; + + while (1) { + while (PDFCharIsWhitespace(ch)) { + if (!GetNextChar(ch)) + return; + } + + if (ch != '%') + break; + + while (1) { + if (!GetNextChar(ch)) + return; + if (PDFCharIsLineEnding(ch)) + break; + } + } + m_Pos--; +} + +CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) { + GetNextWordInternal(bIsNumber); + return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize); +} + +CFX_ByteString CPDF_SyntaxParser::GetKeyword() { + return GetNextWord(nullptr); +} + +CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList, + FX_DWORD objnum, + FX_DWORD gennum, + FX_BOOL bDecrypt) { + CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth); + if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) + return nullptr; + + FX_FILESIZE SavedPos = m_Pos; + bool bIsNumber; + CFX_ByteString word = GetNextWord(&bIsNumber); + if (word.GetLength() == 0) + return nullptr; + + if (bIsNumber) { + FX_FILESIZE SavedPos = m_Pos; + CFX_ByteString nextword = GetNextWord(&bIsNumber); + if (bIsNumber) { + CFX_ByteString nextword2 = GetNextWord(nullptr); + if (nextword2 == "R") { + FX_DWORD objnum = FXSYS_atoui(word); + return new CPDF_Reference(pObjList, objnum); + } + } + m_Pos = SavedPos; + return new CPDF_Number(word); + } + + if (word == "true" || word == "false") + return new CPDF_Boolean(word == "true"); + + if (word == "null") + return new CPDF_Null; + + if (word == "(") { + CFX_ByteString str = ReadString(); + if (m_pCryptoHandler && bDecrypt) + m_pCryptoHandler->Decrypt(objnum, gennum, str); + return new CPDF_String(str, FALSE); + } + + if (word == "<") { + CFX_ByteString str = ReadHexString(); + if (m_pCryptoHandler && bDecrypt) + m_pCryptoHandler->Decrypt(objnum, gennum, str); + + return new CPDF_String(str, TRUE); + } + + if (word == "[") { + CPDF_Array* pArray = new CPDF_Array; + while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true)) + pArray->Add(pObj); + + return pArray; + } + + if (word[0] == '/') { + return new CPDF_Name( + PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))); + } + + if (word == "<<") { + int32_t nKeys = 0; + FX_FILESIZE dwSignValuePos = 0; + + std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( + new CPDF_Dictionary); + while (1) { + CFX_ByteString key = GetNextWord(nullptr); + if (key.IsEmpty()) + return nullptr; + + FX_FILESIZE SavedPos = m_Pos - key.GetLength(); + if (key == ">>") + break; + + if (key == "endobj") { + m_Pos = SavedPos; + break; + } + + if (key[0] != '/') + continue; + + ++nKeys; + key = PDF_NameDecode(key); + if (key.IsEmpty()) + continue; + + if (key == "/Contents") + dwSignValuePos = m_Pos; + + CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true); + if (!pObj) + continue; + + CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1); + pDict->SetAt(keyNoSlash, pObj); + } + + // Only when this is a signature dictionary and has contents, we reset the + // contents to the un-decrypted form. + if (pDict->IsSignatureDict() && dwSignValuePos) { + CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); + m_Pos = dwSignValuePos; + pDict->SetAt("Contents", GetObject(pObjList, objnum, gennum, false)); + } + + FX_FILESIZE SavedPos = m_Pos; + CFX_ByteString nextword = GetNextWord(nullptr); + if (nextword != "stream") { + m_Pos = SavedPos; + return pDict.release(); + } + return ReadStream(pDict.release(), objnum, gennum); + } + + if (word == ">>") + m_Pos = SavedPos; + + return nullptr; +} + +CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict( + CPDF_IndirectObjectHolder* pObjList, + FX_DWORD objnum, + FX_DWORD gennum) { + CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth); + if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) + return nullptr; + + FX_FILESIZE SavedPos = m_Pos; + bool bIsNumber; + CFX_ByteString word = GetNextWord(&bIsNumber); + if (word.GetLength() == 0) + return nullptr; + + if (bIsNumber) { + FX_FILESIZE SavedPos = m_Pos; + CFX_ByteString nextword = GetNextWord(&bIsNumber); + if (bIsNumber) { + CFX_ByteString nextword2 = GetNextWord(nullptr); + if (nextword2 == "R") + return new CPDF_Reference(pObjList, FXSYS_atoui(word)); + } + m_Pos = SavedPos; + return new CPDF_Number(word); + } + + if (word == "true" || word == "false") + return new CPDF_Boolean(word == "true"); + + if (word == "null") + return new CPDF_Null; + + if (word == "(") { + CFX_ByteString str = ReadString(); + if (m_pCryptoHandler) + m_pCryptoHandler->Decrypt(objnum, gennum, str); + return new CPDF_String(str, FALSE); + } + + if (word == "<") { + CFX_ByteString str = ReadHexString(); + if (m_pCryptoHandler) + m_pCryptoHandler->Decrypt(objnum, gennum, str); + return new CPDF_String(str, TRUE); + } + + if (word == "[") { + std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray( + new CPDF_Array); + while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true)) + pArray->Add(pObj); + + return m_WordBuffer[0] == ']' ? pArray.release() : nullptr; + } + + if (word[0] == '/') { + return new CPDF_Name( + PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))); + } + + if (word == "<<") { + std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( + new CPDF_Dictionary); + while (1) { + FX_FILESIZE SavedPos = m_Pos; + CFX_ByteString key = GetNextWord(nullptr); + if (key.IsEmpty()) + return nullptr; + + if (key == ">>") + break; + + if (key == "endobj") { + m_Pos = SavedPos; + break; + } + + if (key[0] != '/') + continue; + + key = PDF_NameDecode(key); + std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj( + GetObject(pObjList, objnum, gennum, true)); + if (!obj) { + uint8_t ch; + while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) { + continue; + } + return nullptr; + } + + if (key.GetLength() > 1) { + pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1), + obj.release()); + } + } + + FX_FILESIZE SavedPos = m_Pos; + CFX_ByteString nextword = GetNextWord(nullptr); + if (nextword != "stream") { + m_Pos = SavedPos; + return pDict.release(); + } + + return ReadStream(pDict.release(), objnum, gennum); + } + + if (word == ">>") + m_Pos = SavedPos; + + return nullptr; +} + +unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) { + unsigned char byte1 = 0; + unsigned char byte2 = 0; + + GetCharAt(pos, byte1); + GetCharAt(pos + 1, byte2); + + if (byte1 == '\r' && byte2 == '\n') + return 2; + + if (byte1 == '\r' || byte1 == '\n') + return 1; + + return 0; +} + +CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, + FX_DWORD objnum, + FX_DWORD gennum) { + CPDF_Object* pLenObj = pDict->GetElement("Length"); + FX_FILESIZE len = -1; + CPDF_Reference* pLenObjRef = ToReference(pLenObj); + + bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() && + pLenObjRef->GetRefObjNum() != objnum); + if (pLenObj && differingObjNum) + len = pLenObj->GetInteger(); + + // Locate the start of stream. + ToNextLine(); + FX_FILESIZE streamStartPos = m_Pos; + + const CFX_ByteStringC kEndStreamStr("endstream"); + const CFX_ByteStringC kEndObjStr("endobj"); + + IPDF_CryptoHandler* pCryptoHandler = + objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get(); + if (!pCryptoHandler) { + FX_BOOL bSearchForKeyword = TRUE; + if (len >= 0) { + pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos; + pos += len; + if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) + m_Pos = pos.ValueOrDie(); + + m_Pos += ReadEOLMarkers(m_Pos); + FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1); + GetNextWordInternal(nullptr); + // Earlier version of PDF specification doesn't require EOL marker before + // 'endstream' keyword. If keyword 'endstream' follows the bytes in + // specified length, it signals the end of stream. + if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(), + kEndStreamStr.GetLength()) == 0) { + bSearchForKeyword = FALSE; + } + } + + if (bSearchForKeyword) { + // If len is not available, len needs to be calculated + // by searching the keywords "endstream" or "endobj". + m_Pos = streamStartPos; + FX_FILESIZE endStreamOffset = 0; + while (endStreamOffset >= 0) { + endStreamOffset = FindTag(kEndStreamStr, 0); + + // Can't find "endstream". + if (endStreamOffset < 0) + break; + + // Stop searching when "endstream" is found. + if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen, + kEndStreamStr, TRUE)) { + endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength(); + break; + } + } + + m_Pos = streamStartPos; + FX_FILESIZE endObjOffset = 0; + while (endObjOffset >= 0) { + endObjOffset = FindTag(kEndObjStr, 0); + + // Can't find "endobj". + if (endObjOffset < 0) + break; + + // Stop searching when "endobj" is found. + if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr, + TRUE)) { + endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength(); + break; + } + } + + // Can't find "endstream" or "endobj". + if (endStreamOffset < 0 && endObjOffset < 0) { + pDict->Release(); + return nullptr; + } + + if (endStreamOffset < 0 && endObjOffset >= 0) { + // Correct the position of end stream. + endStreamOffset = endObjOffset; + } else if (endStreamOffset >= 0 && endObjOffset < 0) { + // Correct the position of end obj. + endObjOffset = endStreamOffset; + } else if (endStreamOffset > endObjOffset) { + endStreamOffset = endObjOffset; + } + + len = endStreamOffset; + int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2); + if (numMarkers == 2) { + len -= 2; + } else { + numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1); + if (numMarkers == 1) { + len -= 1; + } + } + + if (len < 0) { + pDict->Release(); + return nullptr; + } + pDict->SetAtInteger("Length", len); + } + m_Pos = streamStartPos; + } + + if (len < 0) { + pDict->Release(); + return nullptr; + } + + uint8_t* pData = nullptr; + if (len > 0) { + pData = FX_Alloc(uint8_t, len); + ReadBlock(pData, len); + if (pCryptoHandler) { + CFX_BinaryBuf dest_buf; + dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len)); + + void* context = pCryptoHandler->DecryptStart(objnum, gennum); + pCryptoHandler->DecryptStream(context, pData, len, dest_buf); + pCryptoHandler->DecryptFinish(context, dest_buf); + + FX_Free(pData); + pData = dest_buf.GetBuffer(); + len = dest_buf.GetSize(); + dest_buf.DetachBuffer(); + } + } + + CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict); + streamStartPos = m_Pos; + FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1); + + GetNextWordInternal(nullptr); + + int numMarkers = ReadEOLMarkers(m_Pos); + if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) && + numMarkers != 0 && + FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) == + 0) { + m_Pos = streamStartPos; + } + return pStream; +} + +void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess, + FX_DWORD HeaderOffset) { + FX_Free(m_pFileBuf); + + m_pFileBuf = FX_Alloc(uint8_t, m_BufSize); + m_HeaderOffset = HeaderOffset; + m_FileLen = pFileAccess->GetSize(); + m_Pos = 0; + m_pFileAccess = pFileAccess; + m_BufOffset = 0; + pFileAccess->ReadBlock( + m_pFileBuf, 0, + (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize)); +} + +uint32_t CPDF_SyntaxParser::GetDirectNum() { + bool bIsNumber; + GetNextWordInternal(&bIsNumber); + if (!bIsNumber) + return 0; + + m_WordBuffer[m_WordSize] = 0; + return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer)); +} + +bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, + FX_FILESIZE limit, + const CFX_ByteStringC& tag, + FX_BOOL checkKeyword) { + const FX_DWORD taglen = tag.GetLength(); + + bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]); + bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) && + !PDFCharIsWhitespace(tag[taglen - 1]); + + uint8_t ch; + if (bCheckRight && startpos + (int32_t)taglen <= limit && + GetCharAt(startpos + (int32_t)taglen, ch)) { + if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || + (checkKeyword && PDFCharIsDelimiter(ch))) { + return false; + } + } + + if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) { + if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || + (checkKeyword && PDFCharIsDelimiter(ch))) { + return false; + } + } + return true; +} + +// TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards +// and drop the bool. +FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag, + FX_BOOL bWholeWord, + FX_BOOL bForward, + FX_FILESIZE limit) { + int32_t taglen = tag.GetLength(); + if (taglen == 0) + return FALSE; + + FX_FILESIZE pos = m_Pos; + int32_t offset = 0; + if (!bForward) + offset = taglen - 1; + + const uint8_t* tag_data = tag.GetPtr(); + uint8_t byte; + while (1) { + if (bForward) { + if (limit && pos >= m_Pos + limit) + return FALSE; + + if (!GetCharAt(pos, byte)) + return FALSE; + + } else { + if (limit && pos <= m_Pos - limit) + return FALSE; + + if (!GetCharAtBackward(pos, byte)) + return FALSE; + } + + if (byte == tag_data[offset]) { + if (bForward) { + offset++; + if (offset < taglen) { + pos++; + continue; + } + } else { + offset--; + if (offset >= 0) { + pos--; + continue; + } + } + + FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos; + if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) { + m_Pos = startpos; + return TRUE; + } + } + + if (bForward) { + offset = byte == tag_data[0] ? 1 : 0; + pos++; + } else { + offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1; + pos--; + } + + if (pos < 0) + return FALSE; + } + + return FALSE; +} + +int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags, + FX_BOOL bWholeWord, + FX_FILESIZE limit) { + int32_t ntags = 1; + for (int i = 0; i < tags.GetLength(); ++i) { + if (tags[i] == 0) + ++ntags; + } + + std::vector<SearchTagRecord> patterns(ntags); + FX_DWORD start = 0; + FX_DWORD itag = 0; + FX_DWORD max_len = 0; + for (int i = 0; i <= tags.GetLength(); ++i) { + if (tags[i] == 0) { + FX_DWORD len = i - start; + max_len = std::max(len, max_len); + patterns[itag].m_pTag = tags.GetCStr() + start; + patterns[itag].m_Len = len; + patterns[itag].m_Offset = 0; + start = i + 1; + ++itag; + } + } + + const FX_FILESIZE pos_limit = m_Pos + limit; + for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) { + uint8_t byte; + if (!GetCharAt(pos, byte)) + break; + + for (int i = 0; i < ntags; ++i) { + SearchTagRecord& pat = patterns[i]; + if (pat.m_pTag[pat.m_Offset] != byte) { + pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0; + continue; + } + + ++pat.m_Offset; + if (pat.m_Offset != pat.m_Len) + continue; + + if (!bWholeWord || + IsWholeWord(pos - pat.m_Len, limit, + CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) { + return i; + } + + pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0; + } + } + return -1; +} + +FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag, + FX_FILESIZE limit) { + int32_t taglen = tag.GetLength(); + int32_t match = 0; + limit += m_Pos; + FX_FILESIZE startpos = m_Pos; + + while (1) { + uint8_t ch; + if (!GetNextChar(ch)) + return -1; + + if (ch == tag[match]) { + match++; + if (match == taglen) + return m_Pos - startpos - taglen; + } else { + match = ch == tag[0] ? 1 : 0; + } + + if (limit && m_Pos == limit) + return -1; + } + return -1; +} + +void CPDF_SyntaxParser::SetEncrypt( + std::unique_ptr<IPDF_CryptoHandler> pCryptoHandler) { + m_pCryptoHandler = std::move(pCryptoHandler); +} diff --git a/core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h b/core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h new file mode 100644 index 0000000000..842b35f8f2 --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h @@ -0,0 +1,96 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_FPDFAPI_FPDF_PARSER_CPDF_SYNTAX_PARSER_H_ +#define CORE_FPDFAPI_FPDF_PARSER_CPDF_SYNTAX_PARSER_H_ + +#include <memory> + +#include "core/include/fxcrt/fx_basic.h" + +class CPDF_Dictionary; +class CPDF_IndirectObjectHolder; +class CPDF_Object; +class CPDF_Stream; +class IFX_FileRead; +class IPDF_CryptoHandler; + +class CPDF_SyntaxParser { + public: + CPDF_SyntaxParser(); + ~CPDF_SyntaxParser(); + + void InitParser(IFX_FileRead* pFileAccess, FX_DWORD HeaderOffset); + + FX_FILESIZE SavePos() const { return m_Pos; } + void RestorePos(FX_FILESIZE pos) { m_Pos = pos; } + + CPDF_Object* GetObject(CPDF_IndirectObjectHolder* pObjList, + FX_DWORD objnum, + FX_DWORD gennum, + FX_BOOL bDecrypt); + CPDF_Object* GetObjectByStrict(CPDF_IndirectObjectHolder* pObjList, + FX_DWORD objnum, + FX_DWORD gennum); + CFX_ByteString GetKeyword(); + + void ToNextLine(); + void ToNextWord(); + + FX_BOOL SearchWord(const CFX_ByteStringC& word, + FX_BOOL bWholeWord, + FX_BOOL bForward, + FX_FILESIZE limit); + int SearchMultiWord(const CFX_ByteStringC& words, + FX_BOOL bWholeWord, + FX_FILESIZE limit); + FX_FILESIZE FindTag(const CFX_ByteStringC& tag, FX_FILESIZE limit); + + void SetEncrypt(std::unique_ptr<IPDF_CryptoHandler> pCryptoHandler); + + FX_BOOL ReadBlock(uint8_t* pBuf, FX_DWORD size); + FX_BOOL GetCharAt(FX_FILESIZE pos, uint8_t& ch); + CFX_ByteString GetNextWord(bool* bIsNumber); + + private: + friend class CPDF_Parser; + friend class CPDF_DataAvail; + friend class cpdf_syntax_parser_ReadHexString_Test; + + static const int kParserMaxRecursionDepth = 64; + static int s_CurrentRecursionDepth; + + uint32_t GetDirectNum(); + + FX_BOOL GetNextChar(uint8_t& ch); + FX_BOOL GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch); + void GetNextWordInternal(bool* bIsNumber); + bool IsWholeWord(FX_FILESIZE startpos, + FX_FILESIZE limit, + const CFX_ByteStringC& tag, + FX_BOOL checkKeyword); + + CFX_ByteString ReadString(); + CFX_ByteString ReadHexString(); + unsigned int ReadEOLMarkers(FX_FILESIZE pos); + CPDF_Stream* ReadStream(CPDF_Dictionary* pDict, + FX_DWORD objnum, + FX_DWORD gennum); + + FX_FILESIZE m_Pos; + int m_MetadataObjnum; + IFX_FileRead* m_pFileAccess; + FX_DWORD m_HeaderOffset; + FX_FILESIZE m_FileLen; + uint8_t* m_pFileBuf; + FX_DWORD m_BufSize; + FX_FILESIZE m_BufOffset; + std::unique_ptr<IPDF_CryptoHandler> m_pCryptoHandler; + uint8_t m_WordBuffer[257]; + FX_DWORD m_WordSize; +}; + +#endif // CORE_FPDFAPI_FPDF_PARSER_CPDF_SYNTAX_PARSER_H_ diff --git a/core/fpdfapi/fpdf_parser/cpdf_syntax_parser_unittest.cpp b/core/fpdfapi/fpdf_parser/cpdf_syntax_parser_unittest.cpp new file mode 100644 index 0000000000..f501c01715 --- /dev/null +++ b/core/fpdfapi/fpdf_parser/cpdf_syntax_parser_unittest.cpp @@ -0,0 +1,171 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <limits> +#include <string> + +#include "core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" +#include "core/include/fpdfapi/cpdf_parser.h" +#include "core/include/fxcrt/fx_ext.h" +#include "core/include/fxcrt/fx_stream.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/utils/path_service.h" + +TEST(cpdf_syntax_parser, ReadHexString) { + { + // Empty string. + uint8_t data[] = ""; + ScopedFileStream stream(FX_CreateMemoryStream(data, 0, FALSE)); + + CPDF_SyntaxParser parser; + parser.InitParser(stream.get(), 0); + EXPECT_EQ("", parser.ReadHexString()); + EXPECT_EQ(0, parser.SavePos()); + } + + { + // Blank string. + uint8_t data[] = " "; + ScopedFileStream stream(FX_CreateMemoryStream(data, 2, FALSE)); + + CPDF_SyntaxParser parser; + parser.InitParser(stream.get(), 0); + EXPECT_EQ("", parser.ReadHexString()); + EXPECT_EQ(2, parser.SavePos()); + } + + { + // Skips unknown characters. + uint8_t data[] = "z12b"; + ScopedFileStream stream(FX_CreateMemoryStream(data, 4, FALSE)); + + CPDF_SyntaxParser parser; + parser.InitParser(stream.get(), 0); + EXPECT_EQ("\x12\xb0", parser.ReadHexString()); + EXPECT_EQ(4, parser.SavePos()); + } + + { + // Skips unknown characters. + uint8_t data[] = "*<&*#$^&@1"; + ScopedFileStream stream(FX_CreateMemoryStream(data, 10, FALSE)); + + CPDF_SyntaxParser parser; + parser.InitParser(stream.get(), 0); + EXPECT_EQ("\x10", parser.ReadHexString()); + EXPECT_EQ(10, parser.SavePos()); + } + + { + // Skips unknown characters. + uint8_t data[] = "\x80zab"; + ScopedFileStream stream(FX_CreateMemoryStream(data, 4, FALSE)); + + CPDF_SyntaxParser parser; + parser.InitParser(stream.get(), 0); + EXPECT_EQ("\xab", parser.ReadHexString()); + EXPECT_EQ(4, parser.SavePos()); + } + + { + // Skips unknown characters. + uint8_t data[] = "\xffzab"; + ScopedFileStream stream(FX_CreateMemoryStream(data, 4, FALSE)); + + CPDF_SyntaxParser parser; + parser.InitParser(stream.get(), 0); + EXPECT_EQ("\xab", parser.ReadHexString()); + EXPECT_EQ(4, parser.SavePos()); + } + + { + // Regular conversion. + uint8_t data[] = "1A2b>abcd"; + ScopedFileStream stream(FX_CreateMemoryStream(data, 9, FALSE)); + + CPDF_SyntaxParser parser; + parser.InitParser(stream.get(), 0); + EXPECT_EQ("\x1a\x2b", parser.ReadHexString()); + EXPECT_EQ(5, parser.SavePos()); + } + + { + // Position out of bounds. + uint8_t data[] = "12ab>"; + ScopedFileStream stream(FX_CreateMemoryStream(data, 5, FALSE)); + + CPDF_SyntaxParser parser; + parser.InitParser(stream.get(), 0); + parser.RestorePos(5); + EXPECT_EQ("", parser.ReadHexString()); + + parser.RestorePos(6); + EXPECT_EQ("", parser.ReadHexString()); + + parser.RestorePos(-1); + EXPECT_EQ("", parser.ReadHexString()); + + parser.RestorePos(std::numeric_limits<FX_FILESIZE>::max()); + EXPECT_EQ("", parser.ReadHexString()); + + // Check string still parses when set to 0. + parser.RestorePos(0); + EXPECT_EQ("\x12\xab", parser.ReadHexString()); + } + + { + // Missing ending >. + uint8_t data[] = "1A2b"; + ScopedFileStream stream(FX_CreateMemoryStream(data, 4, FALSE)); + + CPDF_SyntaxParser parser; + parser.InitParser(stream.get(), 0); + EXPECT_EQ("\x1a\x2b", parser.ReadHexString()); + EXPECT_EQ(4, parser.SavePos()); + } + + { + // Missing ending >. + uint8_t data[] = "12abz"; + ScopedFileStream stream(FX_CreateMemoryStream(data, 5, FALSE)); + + CPDF_SyntaxParser parser; + parser.InitParser(stream.get(), 0); + EXPECT_EQ("\x12\xab", parser.ReadHexString()); + EXPECT_EQ(5, parser.SavePos()); + } + + { + // Uneven number of bytes. + uint8_t data[] = "1A2>asdf"; + ScopedFileStream stream(FX_CreateMemoryStream(data, 8, FALSE)); + + CPDF_SyntaxParser parser; + parser.InitParser(stream.get(), 0); + EXPECT_EQ("\x1a\x20", parser.ReadHexString()); + EXPECT_EQ(4, parser.SavePos()); + } + + { + // Uneven number of bytes. + uint8_t data[] = "1A2zasdf"; + ScopedFileStream stream(FX_CreateMemoryStream(data, 8, FALSE)); + + CPDF_SyntaxParser parser; + parser.InitParser(stream.get(), 0); + EXPECT_EQ("\x1a\x2a\xdf", parser.ReadHexString()); + EXPECT_EQ(8, parser.SavePos()); + } + + { + // Just ending character. + uint8_t data[] = ">"; + ScopedFileStream stream(FX_CreateMemoryStream(data, 1, FALSE)); + + CPDF_SyntaxParser parser; + parser.InitParser(stream.get(), 0); + EXPECT_EQ("", parser.ReadHexString()); + EXPECT_EQ(1, parser.SavePos()); + } +} diff --git a/core/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp b/core/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp new file mode 100644 index 0000000000..eeb365d0e6 --- /dev/null +++ b/core/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp @@ -0,0 +1,592 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/include/fpdfapi/fpdf_parser_decode.h" + +#include <limits.h> +#include <vector> + +#include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h" +#include "core/include/fpdfapi/cpdf_array.h" +#include "core/include/fpdfapi/cpdf_dictionary.h" +#include "core/include/fpdfapi/fpdf_module.h" +#include "core/include/fxcodec/fx_codec.h" +#include "core/include/fxcrt/fx_ext.h" +#include "third_party/base/stl_util.h" + +#define _STREAM_MAX_SIZE_ 20 * 1024 * 1024 + +const FX_WORD PDFDocEncoding[256] = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, + 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, + 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x02d8, 0x02c7, 0x02c6, + 0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc, 0x0020, 0x0021, 0x0022, 0x0023, + 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, + 0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, + 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, + 0x003f, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050, + 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, + 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062, + 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, + 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, + 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, + 0x007e, 0x0000, 0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, + 0x2044, 0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018, + 0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160, 0x0178, + 0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, 0x0000, 0x20ac, 0x00a1, + 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa, + 0x00ab, 0x00ac, 0x0000, 0x00ae, 0x00af, 0x00b0, 0x00b1, 0x00b2, 0x00b3, + 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, + 0x00bd, 0x00be, 0x00bf, 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, + 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, + 0x00cf, 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, + 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 0x00e0, + 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, + 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x00f0, 0x00f1, 0x00f2, + 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, + 0x00fc, 0x00fd, 0x00fe, 0x00ff}; + +FX_DWORD A85Decode(const uint8_t* src_buf, + FX_DWORD src_size, + uint8_t*& dest_buf, + FX_DWORD& dest_size) { + dest_size = 0; + dest_buf = nullptr; + if (src_size == 0) + return 0; + + // Count legal characters and zeros. + FX_DWORD zcount = 0; + FX_DWORD pos = 0; + while (pos < src_size) { + uint8_t ch = src_buf[pos]; + if (ch == 'z') { + zcount++; + } else if ((ch < '!' || ch > 'u') && !PDFCharIsLineEnding(ch) && + ch != ' ' && ch != '\t') { + break; + } + pos++; + } + // No content to decode. + if (pos == 0) + return 0; + + // Count the space needed to contain non-zero characters. The encoding ratio + // of Ascii85 is 4:5. + FX_DWORD space_for_non_zeroes = (pos - zcount) / 5 * 4 + 4; + if (zcount > (UINT_MAX - space_for_non_zeroes) / 4) { + return (FX_DWORD)-1; + } + dest_buf = FX_Alloc(uint8_t, zcount * 4 + space_for_non_zeroes); + size_t state = 0; + uint32_t res = 0; + pos = dest_size = 0; + while (pos < src_size) { + uint8_t ch = src_buf[pos++]; + if (PDFCharIsLineEnding(ch) || ch == ' ' || ch == '\t') + continue; + + if (ch == 'z') { + FXSYS_memset(dest_buf + dest_size, 0, 4); + state = 0; + res = 0; + dest_size += 4; + } else if (ch >= '!' && ch <= 'u') { + res = res * 85 + ch - 33; + state++; + if (state == 5) { + for (size_t i = 0; i < 4; i++) { + dest_buf[dest_size++] = (uint8_t)(res >> (3 - i) * 8); + } + state = 0; + res = 0; + } + } else { + // The end or illegal character. + break; + } + } + // Handle partial group. + if (state) { + for (size_t i = state; i < 5; i++) + res = res * 85 + 84; + for (size_t i = 0; i < state - 1; i++) + dest_buf[dest_size++] = (uint8_t)(res >> (3 - i) * 8); + } + if (pos < src_size && src_buf[pos] == '>') + pos++; + return pos; +} + +FX_DWORD HexDecode(const uint8_t* src_buf, + FX_DWORD src_size, + uint8_t*& dest_buf, + FX_DWORD& dest_size) { + dest_size = 0; + if (src_size == 0) { + dest_buf = nullptr; + return 0; + } + + FX_DWORD i = 0; + // Find the end of data. + while (i < src_size && src_buf[i] != '>') + i++; + + dest_buf = FX_Alloc(uint8_t, i / 2 + 1); + bool bFirst = true; + for (i = 0; i < src_size; i++) { + uint8_t ch = src_buf[i]; + if (PDFCharIsLineEnding(ch) || ch == ' ' || ch == '\t') + continue; + + if (ch == '>') { + ++i; + break; + } + if (!std::isxdigit(ch)) + continue; + + int digit = FXSYS_toHexDigit(ch); + if (bFirst) + dest_buf[dest_size] = digit * 16; + else + dest_buf[dest_size++] += digit; + + bFirst = !bFirst; + } + if (!bFirst) + dest_size++; + return i; +} + +FX_DWORD RunLengthDecode(const uint8_t* src_buf, + FX_DWORD src_size, + uint8_t*& dest_buf, + FX_DWORD& dest_size) { + FX_DWORD i = 0; + FX_DWORD old; + dest_size = 0; + while (i < src_size) { + if (src_buf[i] < 128) { + old = dest_size; + dest_size += src_buf[i] + 1; + if (dest_size < old) { + return (FX_DWORD)-1; + } + i += src_buf[i] + 2; + } else if (src_buf[i] > 128) { + old = dest_size; + dest_size += 257 - src_buf[i]; + if (dest_size < old) { + return (FX_DWORD)-1; + } + i += 2; + } else { + break; + } + } + if (dest_size >= _STREAM_MAX_SIZE_) { + return -1; + } + dest_buf = FX_Alloc(uint8_t, dest_size); + i = 0; + int dest_count = 0; + while (i < src_size) { + if (src_buf[i] < 128) { + FX_DWORD copy_len = src_buf[i] + 1; + FX_DWORD buf_left = src_size - i - 1; + if (buf_left < copy_len) { + FX_DWORD delta = copy_len - buf_left; + copy_len = buf_left; + FXSYS_memset(dest_buf + dest_count + copy_len, '\0', delta); + } + FXSYS_memcpy(dest_buf + dest_count, src_buf + i + 1, copy_len); + dest_count += src_buf[i] + 1; + i += src_buf[i] + 2; + } else if (src_buf[i] > 128) { + int fill = 0; + if (i < src_size - 1) { + fill = src_buf[i + 1]; + } + FXSYS_memset(dest_buf + dest_count, fill, 257 - src_buf[i]); + dest_count += 257 - src_buf[i]; + i += 2; + } else { + break; + } + } + FX_DWORD ret = i + 1; + if (ret > src_size) { + ret = src_size; + } + return ret; +} + +ICodec_ScanlineDecoder* FPDFAPI_CreateFaxDecoder( + const uint8_t* src_buf, + FX_DWORD src_size, + int width, + int height, + const CPDF_Dictionary* pParams) { + int K = 0; + FX_BOOL EndOfLine = FALSE; + FX_BOOL ByteAlign = FALSE; + FX_BOOL BlackIs1 = FALSE; + int Columns = 1728; + int Rows = 0; + if (pParams) { + K = pParams->GetIntegerBy("K"); + EndOfLine = pParams->GetIntegerBy("EndOfLine"); + ByteAlign = pParams->GetIntegerBy("EncodedByteAlign"); + BlackIs1 = pParams->GetIntegerBy("BlackIs1"); + Columns = pParams->GetIntegerBy("Columns", 1728); + Rows = pParams->GetIntegerBy("Rows"); + if (Rows > USHRT_MAX) { + Rows = 0; + } + if (Columns <= 0 || Rows < 0 || Columns > USHRT_MAX || Rows > USHRT_MAX) { + return nullptr; + } + } + return CPDF_ModuleMgr::Get()->GetFaxModule()->CreateDecoder( + src_buf, src_size, width, height, K, EndOfLine, ByteAlign, BlackIs1, + Columns, Rows); +} + +static FX_BOOL CheckFlateDecodeParams(int Colors, + int BitsPerComponent, + int Columns) { + if (Columns < 0) { + return FALSE; + } + int check = Columns; + if (Colors < 0 || (check > 0 && Colors > INT_MAX / check)) { + return FALSE; + } + check *= Colors; + if (BitsPerComponent < 0 || + (check > 0 && BitsPerComponent > INT_MAX / check)) { + return FALSE; + } + check *= BitsPerComponent; + if (check > INT_MAX - 7) { + return FALSE; + } + return TRUE; +} + +ICodec_ScanlineDecoder* FPDFAPI_CreateFlateDecoder( + const uint8_t* src_buf, + FX_DWORD src_size, + int width, + int height, + int nComps, + int bpc, + const CPDF_Dictionary* pParams) { + int predictor = 0; + int Colors = 0, BitsPerComponent = 0, Columns = 0; + if (pParams) { + predictor = pParams->GetIntegerBy("Predictor"); + Colors = pParams->GetIntegerBy("Colors", 1); + BitsPerComponent = pParams->GetIntegerBy("BitsPerComponent", 8); + Columns = pParams->GetIntegerBy("Columns", 1); + if (!CheckFlateDecodeParams(Colors, BitsPerComponent, Columns)) { + return nullptr; + } + } + return CPDF_ModuleMgr::Get()->GetFlateModule()->CreateDecoder( + src_buf, src_size, width, height, nComps, bpc, predictor, Colors, + BitsPerComponent, Columns); +} + +FX_DWORD FPDFAPI_FlateOrLZWDecode(FX_BOOL bLZW, + const uint8_t* src_buf, + FX_DWORD src_size, + CPDF_Dictionary* pParams, + FX_DWORD estimated_size, + uint8_t*& dest_buf, + FX_DWORD& dest_size) { + int predictor = 0; + FX_BOOL bEarlyChange = TRUE; + int Colors = 0, BitsPerComponent = 0, Columns = 0; + if (pParams) { + predictor = pParams->GetIntegerBy("Predictor"); + bEarlyChange = pParams->GetIntegerBy("EarlyChange", 1); + Colors = pParams->GetIntegerBy("Colors", 1); + BitsPerComponent = pParams->GetIntegerBy("BitsPerComponent", 8); + Columns = pParams->GetIntegerBy("Columns", 1); + if (!CheckFlateDecodeParams(Colors, BitsPerComponent, Columns)) { + return (FX_DWORD)-1; + } + } + return CPDF_ModuleMgr::Get()->GetFlateModule()->FlateOrLZWDecode( + bLZW, src_buf, src_size, bEarlyChange, predictor, Colors, + BitsPerComponent, Columns, estimated_size, dest_buf, dest_size); +} + +FX_BOOL PDF_DataDecode(const uint8_t* src_buf, + FX_DWORD src_size, + const CPDF_Dictionary* pDict, + uint8_t*& dest_buf, + FX_DWORD& dest_size, + CFX_ByteString& ImageEncoding, + CPDF_Dictionary*& pImageParms, + FX_DWORD last_estimated_size, + FX_BOOL bImageAcc) { + CPDF_Object* pDecoder = pDict ? pDict->GetElementValue("Filter") : nullptr; + if (!pDecoder || (!pDecoder->IsArray() && !pDecoder->IsName())) + return FALSE; + + CPDF_Object* pParams = + pDict ? pDict->GetElementValue("DecodeParms") : nullptr; + std::vector<CFX_ByteString> DecoderList; + CFX_ArrayTemplate<CPDF_Object*> ParamList; + if (CPDF_Array* pDecoders = pDecoder->AsArray()) { + CPDF_Array* pParamsArray = ToArray(pParams); + if (!pParamsArray) + pParams = nullptr; + + for (FX_DWORD i = 0; i < pDecoders->GetCount(); i++) { + DecoderList.push_back(pDecoders->GetConstStringAt(i)); + ParamList.Add(pParams ? pParamsArray->GetDictAt(i) : nullptr); + } + } else { + DecoderList.push_back(pDecoder->GetConstString()); + ParamList.Add(pParams ? pParams->GetDict() : nullptr); + } + uint8_t* last_buf = (uint8_t*)src_buf; + FX_DWORD last_size = src_size; + int nSize = pdfium::CollectionSize<int>(DecoderList); + for (int i = 0; i < nSize; i++) { + int estimated_size = i == nSize - 1 ? last_estimated_size : 0; + CFX_ByteString decoder = DecoderList[i]; + // Use ToDictionary here because we can push nullptr into the ParamList. + CPDF_Dictionary* pParam = ToDictionary(ParamList[i]); + uint8_t* new_buf = nullptr; + FX_DWORD new_size = (FX_DWORD)-1; + int offset = -1; + if (decoder == "FlateDecode" || decoder == "Fl") { + if (bImageAcc && i == nSize - 1) { + ImageEncoding = "FlateDecode"; + dest_buf = (uint8_t*)last_buf; + dest_size = last_size; + pImageParms = pParam; + return TRUE; + } + offset = FPDFAPI_FlateOrLZWDecode(FALSE, last_buf, last_size, pParam, + estimated_size, new_buf, new_size); + } else if (decoder == "LZWDecode" || decoder == "LZW") { + offset = FPDFAPI_FlateOrLZWDecode(TRUE, last_buf, last_size, pParam, + estimated_size, new_buf, new_size); + } else if (decoder == "ASCII85Decode" || decoder == "A85") { + offset = A85Decode(last_buf, last_size, new_buf, new_size); + } else if (decoder == "ASCIIHexDecode" || decoder == "AHx") { + offset = HexDecode(last_buf, last_size, new_buf, new_size); + } else if (decoder == "RunLengthDecode" || decoder == "RL") { + if (bImageAcc && i == nSize - 1) { + ImageEncoding = "RunLengthDecode"; + dest_buf = (uint8_t*)last_buf; + dest_size = last_size; + pImageParms = pParam; + return TRUE; + } + offset = RunLengthDecode(last_buf, last_size, new_buf, new_size); + } else if (decoder == "Crypt") { + continue; + } else { + // If we get here, assume it's an image decoder. + if (decoder == "DCT") { + decoder = "DCTDecode"; + } else if (decoder == "CCF") { + decoder = "CCITTFaxDecode"; + } + ImageEncoding = decoder; + pImageParms = pParam; + dest_buf = (uint8_t*)last_buf; + dest_size = last_size; + if (CPDF_Array* pDecoders = pDecoder->AsArray()) + pDecoders->RemoveAt(i + 1, pDecoders->GetCount() - i - 1); + return TRUE; + } + if (last_buf != src_buf) { + FX_Free(last_buf); + } + if (offset == -1) { + FX_Free(new_buf); + return FALSE; + } + last_buf = new_buf; + last_size = new_size; + } + ImageEncoding = ""; + pImageParms = nullptr; + dest_buf = last_buf; + dest_size = last_size; + return TRUE; +} + +CFX_WideString PDF_DecodeText(const uint8_t* src_data, FX_DWORD src_len) { + CFX_WideString result; + if (src_len >= 2 && ((src_data[0] == 0xfe && src_data[1] == 0xff) || + (src_data[0] == 0xff && src_data[1] == 0xfe))) { + bool bBE = src_data[0] == 0xfe; + FX_DWORD max_chars = (src_len - 2) / 2; + if (!max_chars) { + return result; + } + if (src_data[0] == 0xff) { + bBE = !src_data[2]; + } + FX_WCHAR* dest_buf = result.GetBuffer(max_chars); + const uint8_t* uni_str = src_data + 2; + int dest_pos = 0; + for (FX_DWORD i = 0; i < max_chars * 2; i += 2) { + FX_WORD unicode = bBE ? (uni_str[i] << 8 | uni_str[i + 1]) + : (uni_str[i + 1] << 8 | uni_str[i]); + if (unicode == 0x1b) { + i += 2; + while (i < max_chars * 2) { + FX_WORD unicode = bBE ? (uni_str[i] << 8 | uni_str[i + 1]) + : (uni_str[i + 1] << 8 | uni_str[i]); + i += 2; + if (unicode == 0x1b) { + break; + } + } + } else { + dest_buf[dest_pos++] = unicode; + } + } + result.ReleaseBuffer(dest_pos); + } else { + FX_WCHAR* dest_buf = result.GetBuffer(src_len); + for (FX_DWORD i = 0; i < src_len; i++) + dest_buf[i] = PDFDocEncoding[src_data[i]]; + result.ReleaseBuffer(src_len); + } + return result; +} + +CFX_WideString PDF_DecodeText(const CFX_ByteString& bstr) { + return PDF_DecodeText((const uint8_t*)bstr.c_str(), bstr.GetLength()); +} + +CFX_ByteString PDF_EncodeText(const FX_WCHAR* pString, int len) { + if (len == -1) { + len = FXSYS_wcslen(pString); + } + CFX_ByteString result; + FX_CHAR* dest_buf1 = result.GetBuffer(len); + int i; + for (i = 0; i < len; i++) { + int code; + for (code = 0; code < 256; code++) + if (PDFDocEncoding[code] == pString[i]) { + break; + } + if (code == 256) { + break; + } + dest_buf1[i] = code; + } + result.ReleaseBuffer(i); + if (i == len) { + return result; + } + + if (len > INT_MAX / 2 - 1) { + result.ReleaseBuffer(0); + return result; + } + + int encLen = len * 2 + 2; + + uint8_t* dest_buf2 = (uint8_t*)result.GetBuffer(encLen); + dest_buf2[0] = 0xfe; + dest_buf2[1] = 0xff; + dest_buf2 += 2; + for (int i = 0; i < len; i++) { + *dest_buf2++ = pString[i] >> 8; + *dest_buf2++ = (uint8_t)pString[i]; + } + result.ReleaseBuffer(encLen); + return result; +} + +CFX_ByteString PDF_EncodeText(const CFX_WideString& str) { + return PDF_EncodeText(str.c_str(), str.GetLength()); +} + +CFX_ByteString PDF_EncodeString(const CFX_ByteString& src, FX_BOOL bHex) { + CFX_ByteTextBuf result; + int srclen = src.GetLength(); + if (bHex) { + result.AppendChar('<'); + for (int i = 0; i < srclen; i++) { + result.AppendChar("0123456789ABCDEF"[src[i] / 16]); + result.AppendChar("0123456789ABCDEF"[src[i] % 16]); + } + result.AppendChar('>'); + return result.GetByteString(); + } + result.AppendChar('('); + for (int i = 0; i < srclen; i++) { + uint8_t ch = src[i]; + if (ch == ')' || ch == '\\' || ch == '(') { + result.AppendChar('\\'); + } else if (ch == 0x0a) { + result << "\\n"; + continue; + } else if (ch == 0x0d) { + result << "\\r"; + continue; + } + result.AppendChar(ch); + } + result.AppendChar(')'); + return result.GetByteString(); +} + +void FlateEncode(const uint8_t* src_buf, + FX_DWORD src_size, + uint8_t*& dest_buf, + FX_DWORD& dest_size) { + CCodec_ModuleMgr* pEncoders = CPDF_ModuleMgr::Get()->GetCodecModule(); + if (pEncoders) { + pEncoders->GetFlateModule()->Encode(src_buf, src_size, dest_buf, dest_size); + } +} + +void FlateEncode(const uint8_t* src_buf, + FX_DWORD src_size, + int predictor, + int Colors, + int BitsPerComponent, + int Columns, + uint8_t*& dest_buf, + FX_DWORD& dest_size) { + CCodec_ModuleMgr* pEncoders = CPDF_ModuleMgr::Get()->GetCodecModule(); + if (pEncoders) { + pEncoders->GetFlateModule()->Encode(src_buf, src_size, predictor, Colors, + BitsPerComponent, Columns, dest_buf, + dest_size); + } +} + +FX_DWORD FlateDecode(const uint8_t* src_buf, + FX_DWORD src_size, + uint8_t*& dest_buf, + FX_DWORD& dest_size) { + CCodec_ModuleMgr* pEncoders = CPDF_ModuleMgr::Get()->GetCodecModule(); + if (pEncoders) { + return pEncoders->GetFlateModule()->FlateOrLZWDecode( + FALSE, src_buf, src_size, FALSE, 0, 0, 0, 0, 0, dest_buf, dest_size); + } + return 0; +} diff --git a/core/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp b/core/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp new file mode 100644 index 0000000000..2412f4310c --- /dev/null +++ b/core/fpdfapi/fpdf_parser/fpdf_parser_decode_embeddertest.cpp @@ -0,0 +1,117 @@ +// Copyright 2015 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/include/fpdfapi/fpdf_parser_decode.h" + +#include <cstring> +#include <string> + +#include "core/include/fxcrt/fx_basic.h" +#include "testing/embedder_test.h" +#include "testing/fx_string_testhelpers.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/test_support.h" + +class FPDFParserDecodeEmbeddertest : public EmbedderTest {}; + +// NOTE: python's zlib.compress() and zlib.decompress() may be useful for +// external validation of the FlateEncode/FlateDecode test cases. + +TEST_F(FPDFParserDecodeEmbeddertest, FlateEncode) { + pdfium::StrFuncTestData flate_encode_cases[] = { + STR_IN_OUT_CASE("", "\x78\x9c\x03\x00\x00\x00\x00\x01"), + STR_IN_OUT_CASE(" ", "\x78\x9c\x53\x00\x00\x00\x21\x00\x21"), + STR_IN_OUT_CASE("123", "\x78\x9c\x33\x34\x32\x06\x00\01\x2d\x00\x97"), + STR_IN_OUT_CASE("\x00\xff", "\x78\x9c\x63\xf8\x0f\x00\x01\x01\x01\x00"), + STR_IN_OUT_CASE( + "1 0 0 -1 29 763 cm\n0 0 555 735 re\nW n\nq\n0 0 555 734.394 re\n" + "W n\nq\n0.8009 0 0 0.8009 0 0 cm\n1 1 1 RG 1 1 1 rg\n/G0 gs\n" + "0 0 693 917 re\nf\nQ\nQ\n", + "\x78\x9c\x33\x54\x30\x00\x42\x5d\x43\x05\x23\x4b\x05\x73\x33\x63" + "\x85\xe4\x5c\x2e\x90\x80\xa9\xa9\xa9\x82\xb9\xb1\xa9\x42\x51\x2a" + "\x57\xb8\x42\x1e\x57\x21\x92\xa0\x89\x9e\xb1\xa5\x09\x92\x84\x9e" + "\x85\x81\x81\x25\xd8\x14\x24\x26\xd0\x18\x43\x05\x10\x0c\x72\x57" + "\x80\x30\x8a\xd2\xb9\xf4\xdd\x0d\x14\xd2\x8b\xc1\x46\x99\x59\x1a" + "\x2b\x58\x1a\x9a\x83\x8c\x49\xe3\x0a\x04\x42\x00\x37\x4c\x1b\x42"), + }; + + for (size_t i = 0; i < FX_ArraySize(flate_encode_cases); ++i) { + const pdfium::StrFuncTestData& data = flate_encode_cases[i]; + unsigned char* result; + unsigned int result_size; + FlateEncode(data.input, data.input_size, result, result_size); + ASSERT_TRUE(result); + EXPECT_EQ(std::string((const char*)data.expected, data.expected_size), + std::string((const char*)result, result_size)) + << " for case " << i; + FX_Free(result); + } +} + +TEST_F(FPDFParserDecodeEmbeddertest, FlateDecode) { + pdfium::DecodeTestData flate_decode_cases[] = { + STR_IN_OUT_CASE("", "", 0), + STR_IN_OUT_CASE("preposterous nonsense", "", 2), + STR_IN_OUT_CASE("\x78\x9c\x03\x00\x00\x00\x00\x01", "", 8), + STR_IN_OUT_CASE("\x78\x9c\x53\x00\x00\x00\x21\x00\x21", " ", 9), + STR_IN_OUT_CASE("\x78\x9c\x33\x34\x32\x06\x00\01\x2d\x00\x97", "123", 11), + STR_IN_OUT_CASE("\x78\x9c\x63\xf8\x0f\x00\x01\x01\x01\x00", "\x00\xff", + 10), + STR_IN_OUT_CASE( + "\x78\x9c\x33\x54\x30\x00\x42\x5d\x43\x05\x23\x4b\x05\x73\x33\x63" + "\x85\xe4\x5c\x2e\x90\x80\xa9\xa9\xa9\x82\xb9\xb1\xa9\x42\x51\x2a" + "\x57\xb8\x42\x1e\x57\x21\x92\xa0\x89\x9e\xb1\xa5\x09\x92\x84\x9e" + "\x85\x81\x81\x25\xd8\x14\x24\x26\xd0\x18\x43\x05\x10\x0c\x72\x57" + "\x80\x30\x8a\xd2\xb9\xf4\xdd\x0d\x14\xd2\x8b\xc1\x46\x99\x59\x1a" + "\x2b\x58\x1a\x9a\x83\x8c\x49\xe3\x0a\x04\x42\x00\x37\x4c\x1b\x42", + "1 0 0 -1 29 763 cm\n0 0 555 735 re\nW n\nq\n0 0 555 734.394 re\n" + "W n\nq\n0.8009 0 0 0.8009 0 0 cm\n1 1 1 RG 1 1 1 rg\n/G0 gs\n" + "0 0 693 917 re\nf\nQ\nQ\n", + 96), + }; + + for (size_t i = 0; i < FX_ArraySize(flate_decode_cases); ++i) { + const pdfium::DecodeTestData& data = flate_decode_cases[i]; + unsigned char* result; + unsigned int result_size; + EXPECT_EQ(data.processed_size, + FlateDecode(data.input, data.input_size, result, result_size)) + << " for case " << i; + ASSERT_TRUE(result); + EXPECT_EQ(std::string((const char*)data.expected, data.expected_size), + std::string((const char*)result, result_size)) + << " for case " << i; + FX_Free(result); + } +} + +TEST_F(FPDFParserDecodeEmbeddertest, Bug_552046) { + // Tests specifying multiple image filters for a stream. Should not cause a + // crash when rendered. + EXPECT_TRUE(OpenDocument("bug_552046.pdf")); + FPDF_PAGE page = LoadPage(0); + FPDF_BITMAP bitmap = RenderPage(page); + FPDFBitmap_Destroy(bitmap); + UnloadPage(page); +} + +TEST_F(FPDFParserDecodeEmbeddertest, Bug_555784) { + // Tests bad input to the run length decoder that caused a heap overflow. + // Should not cause a crash when rendered. + EXPECT_TRUE(OpenDocument("bug_555784.pdf")); + FPDF_PAGE page = LoadPage(0); + FPDF_BITMAP bitmap = RenderPage(page); + FPDFBitmap_Destroy(bitmap); + UnloadPage(page); +} + +TEST_F(FPDFParserDecodeEmbeddertest, Bug_455199) { + // Tests object numbers with a value > 01000000. + // Should open successfully. + EXPECT_TRUE(OpenDocument("bug_455199.pdf")); + FPDF_PAGE page = LoadPage(0); + FPDF_BITMAP bitmap = RenderPage(page); + FPDFBitmap_Destroy(bitmap); + UnloadPage(page); +} diff --git a/core/fpdfapi/fpdf_parser/fpdf_parser_decode_unittest.cpp b/core/fpdfapi/fpdf_parser/fpdf_parser_decode_unittest.cpp new file mode 100644 index 0000000000..e00759da7a --- /dev/null +++ b/core/fpdfapi/fpdf_parser/fpdf_parser_decode_unittest.cpp @@ -0,0 +1,78 @@ +// Copyright 2015 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/include/fpdfapi/fpdf_parser_decode.h" + +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/test_support.h" + +TEST(fpdf_parser_decode, A85Decode) { + pdfium::DecodeTestData test_data[] = { + // Empty src string. + STR_IN_OUT_CASE("", "", 0), + // Empty content in src string. + STR_IN_OUT_CASE("~>", "", 0), + // Regular conversion. + STR_IN_OUT_CASE("FCfN8~>", "test", 7), + // End at the ending mark. + STR_IN_OUT_CASE("FCfN8~>FCfN8", "test", 7), + // Skip whitespaces. + STR_IN_OUT_CASE("\t F C\r\n \tf N 8 ~>", "test", 17), + // No ending mark. + STR_IN_OUT_CASE("@3B0)DJj_BF*)>@Gp#-s", "a funny story :)", 20), + // Non-multiple length. + STR_IN_OUT_CASE("12A", "2k", 3), + // Stop at unknown characters. + STR_IN_OUT_CASE("FCfN8FCfN8vw", "testtest", 11), + }; + for (size_t i = 0; i < FX_ArraySize(test_data); ++i) { + pdfium::DecodeTestData* ptr = &test_data[i]; + uint8_t* result = nullptr; + FX_DWORD result_size; + EXPECT_EQ(ptr->processed_size, + A85Decode(ptr->input, ptr->input_size, result, result_size)) + << "for case " << i; + ASSERT_EQ(ptr->expected_size, result_size); + for (size_t j = 0; j < result_size; ++j) { + EXPECT_EQ(ptr->expected[j], result[j]) << "for case " << i << " char " + << j; + } + FX_Free(result); + } +} + +TEST(fpdf_parser_decode, HexDecode) { + pdfium::DecodeTestData test_data[] = { + // Empty src string. + STR_IN_OUT_CASE("", "", 0), + // Empty content in src string. + STR_IN_OUT_CASE(">", "", 1), + // Only whitespaces in src string. + STR_IN_OUT_CASE("\t \r\n>", "", 7), + // Regular conversion. + STR_IN_OUT_CASE("12Ac>zzz", "\x12\xac", 5), + // Skip whitespaces. + STR_IN_OUT_CASE("12 Ac\t02\r\nBF>zzz>", "\x12\xac\x02\xbf", 13), + // Non-multiple length. + STR_IN_OUT_CASE("12A>zzz", "\x12\xa0", 4), + // Skips unknown characters. + STR_IN_OUT_CASE("12tk \tAc>zzz", "\x12\xac", 10), + // No ending mark. + STR_IN_OUT_CASE("12AcED3c3456", "\x12\xac\xed\x3c\x34\x56", 12), + }; + for (size_t i = 0; i < FX_ArraySize(test_data); ++i) { + pdfium::DecodeTestData* ptr = &test_data[i]; + uint8_t* result = nullptr; + FX_DWORD result_size; + EXPECT_EQ(ptr->processed_size, + HexDecode(ptr->input, ptr->input_size, result, result_size)) + << "for case " << i; + ASSERT_EQ(ptr->expected_size, result_size); + for (size_t j = 0; j < result_size; ++j) { + EXPECT_EQ(ptr->expected[j], result[j]) << "for case " << i << " char " + << j; + } + FX_Free(result); + } +} diff --git a/core/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp b/core/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp new file mode 100644 index 0000000000..1f0ab5f876 --- /dev/null +++ b/core/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp @@ -0,0 +1,228 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h" + +#include "core/include/fpdfapi/cpdf_array.h" +#include "core/include/fpdfapi/cpdf_dictionary.h" +#include "core/include/fpdfapi/cpdf_number.h" +#include "core/include/fpdfapi/cpdf_reference.h" +#include "core/include/fpdfapi/cpdf_stream.h" +#include "core/include/fpdfapi/cpdf_string.h" +#include "core/include/fpdfapi/fpdf_parser_decode.h" +#include "core/include/fxcrt/fx_ext.h" + +// Indexed by 8-bit character code, contains either: +// 'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff +// 'N' - for numeric: 0123456789+-. +// 'D' - for delimiter: %()/<>[]{} +// 'R' - otherwise. +const char PDF_CharType[256] = { + // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO + // SI + 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W', 'W', 'R', 'W', 'W', 'R', + 'R', + + // DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS + // US + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', + + // SP ! " # $ % & ยด ( ) * + , - . + // / + 'W', 'R', 'R', 'R', 'R', 'D', 'R', 'R', 'D', 'D', 'R', 'N', 'R', 'N', 'N', + 'D', + + // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'R', 'R', 'D', 'R', 'D', + 'R', + + // @ A B C D E F G H I J K L M N O + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', + + // P Q R S T U V W X Y Z [ \ ] ^ _ + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R', + 'R', + + // ` a b c d e f g h i j k l m n o + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', + + // p q r s t u v w x y z { | } ~ + // DEL + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R', + 'R', + + 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'}; + +int32_t GetHeaderOffset(IFX_FileRead* pFile) { + // TODO(dsinclair): This is a complicated way of saying %PDF, simplify? + const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025); + + const size_t kBufSize = 4; + uint8_t buf[kBufSize]; + int32_t offset = 0; + while (offset <= 1024) { + if (!pFile->ReadBlock(buf, offset, kBufSize)) + return -1; + + if (*(FX_DWORD*)buf == tag) + return offset; + + ++offset; + } + return -1; +} + +int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key) { + CPDF_Number* pObj = ToNumber(pDict->GetElement(key)); + return pObj ? pObj->GetInteger() : 0; +} + +CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) { + int size = bstr.GetLength(); + const FX_CHAR* pSrc = bstr.GetCStr(); + if (!FXSYS_memchr(pSrc, '#', size)) { + return bstr; + } + CFX_ByteString result; + FX_CHAR* pDestStart = result.GetBuffer(size); + FX_CHAR* pDest = pDestStart; + for (int i = 0; i < size; i++) { + if (pSrc[i] == '#' && i < size - 2) { + *pDest++ = + FXSYS_toHexDigit(pSrc[i + 1]) * 16 + FXSYS_toHexDigit(pSrc[i + 2]); + i += 2; + } else { + *pDest++ = pSrc[i]; + } + } + result.ReleaseBuffer((FX_STRSIZE)(pDest - pDestStart)); + return result; +} + +CFX_ByteString PDF_NameDecode(const CFX_ByteString& orig) { + if (!FXSYS_memchr(orig.c_str(), '#', orig.GetLength())) { + return orig; + } + return PDF_NameDecode(CFX_ByteStringC(orig)); +} + +CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) { + uint8_t* src_buf = (uint8_t*)orig.c_str(); + int src_len = orig.GetLength(); + int dest_len = 0; + int i; + for (i = 0; i < src_len; i++) { + uint8_t ch = src_buf[i]; + if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' || + PDFCharIsDelimiter(ch)) { + dest_len += 3; + } else { + dest_len++; + } + } + if (dest_len == src_len) + return orig; + + CFX_ByteString res; + FX_CHAR* dest_buf = res.GetBuffer(dest_len); + dest_len = 0; + for (i = 0; i < src_len; i++) { + uint8_t ch = src_buf[i]; + if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' || + PDFCharIsDelimiter(ch)) { + dest_buf[dest_len++] = '#'; + dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16]; + dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16]; + } else { + dest_buf[dest_len++] = ch; + } + } + dest_buf[dest_len] = 0; + res.ReleaseBuffer(); + return res; +} + +CFX_ByteTextBuf& operator<<(CFX_ByteTextBuf& buf, const CPDF_Object* pObj) { + if (!pObj) { + buf << " null"; + return buf; + } + switch (pObj->GetType()) { + case CPDF_Object::NULLOBJ: + buf << " null"; + break; + case CPDF_Object::BOOLEAN: + case CPDF_Object::NUMBER: + buf << " " << pObj->GetString(); + break; + case CPDF_Object::STRING: + buf << PDF_EncodeString(pObj->GetString(), pObj->AsString()->IsHex()); + break; + case CPDF_Object::NAME: { + CFX_ByteString str = pObj->GetString(); + buf << "/" << PDF_NameEncode(str); + break; + } + case CPDF_Object::REFERENCE: { + buf << " " << pObj->AsReference()->GetRefObjNum() << " 0 R "; + break; + } + case CPDF_Object::ARRAY: { + const CPDF_Array* p = pObj->AsArray(); + buf << "["; + for (FX_DWORD i = 0; i < p->GetCount(); i++) { + CPDF_Object* pElement = p->GetElement(i); + if (pElement->GetObjNum()) { + buf << " " << pElement->GetObjNum() << " 0 R"; + } else { + buf << pElement; + } + } + buf << "]"; + break; + } + case CPDF_Object::DICTIONARY: { + const CPDF_Dictionary* p = pObj->AsDictionary(); + buf << "<<"; + for (const auto& it : *p) { + const CFX_ByteString& key = it.first; + CPDF_Object* pValue = it.second; + buf << "/" << PDF_NameEncode(key); + if (pValue && pValue->GetObjNum()) { + buf << " " << pValue->GetObjNum() << " 0 R "; + } else { + buf << pValue; + } + } + buf << ">>"; + break; + } + case CPDF_Object::STREAM: { + const CPDF_Stream* p = pObj->AsStream(); + buf << p->GetDict() << "stream\r\n"; + CPDF_StreamAcc acc; + acc.LoadAllData(p, TRUE); + buf.AppendBlock(acc.GetData(), acc.GetSize()); + buf << "\r\nendstream"; + break; + } + default: + ASSERT(FALSE); + break; + } + return buf; +} diff --git a/core/fpdfapi/fpdf_parser/fpdf_parser_utility.h b/core/fpdfapi/fpdf_parser/fpdf_parser_utility.h new file mode 100644 index 0000000000..d0410eec98 --- /dev/null +++ b/core/fpdfapi/fpdf_parser/fpdf_parser_utility.h @@ -0,0 +1,39 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_FPDFAPI_FPDF_PARSER_FPDF_PARSER_UTILITY_H_ +#define CORE_FPDFAPI_FPDF_PARSER_FPDF_PARSER_UTILITY_H_ + +#include "core/include/fxcrt/fx_string.h" +#include "core/include/fxcrt/fx_system.h" + +class IFX_FileRead; +class CPDF_Dictionary; + +// Use the accessors below instead of directly accessing PDF_CharType. +extern const char PDF_CharType[256]; + +inline bool PDFCharIsWhitespace(uint8_t c) { + return PDF_CharType[c] == 'W'; +} +inline bool PDFCharIsNumeric(uint8_t c) { + return PDF_CharType[c] == 'N'; +} +inline bool PDFCharIsDelimiter(uint8_t c) { + return PDF_CharType[c] == 'D'; +} +inline bool PDFCharIsOther(uint8_t c) { + return PDF_CharType[c] == 'R'; +} + +inline bool PDFCharIsLineEnding(uint8_t c) { + return c == '\r' || c == '\n'; +} + +int32_t GetHeaderOffset(IFX_FileRead* pFile); +int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key); + +#endif // CORE_FPDFAPI_FPDF_PARSER_FPDF_PARSER_UTILITY_H_ |