diff options
author | Henrique Nakashima <hnakashima@chromium.org> | 2017-06-19 16:04:34 -0400 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2017-06-19 20:44:58 +0000 |
commit | b73ce7bcd72a97c780176d9f63681a5415c06422 (patch) | |
tree | 2e0b3b4f2996344101b9af67ce761708255b05ef | |
parent | d3610056e36b9211e9ed3806a3f99a1184e9413e (diff) | |
download | pdfium-chromium/3136.tar.xz |
Fixing metadata not read from linearized file.chromium/3136
This still won't work if the info dict is not on the first page
without first calling FPDFAvail_IsFormAvail or FPDFAvail_IsPageAvail,
as these are the methods that trigger parsing the rest of the data.
Bug: pdfium:664
Change-Id: I0b0193e415a1153dcfb8bfba0e0482da6b6ba53c
Reviewed-on: https://pdfium-review.googlesource.com/6610
Commit-Queue: Henrique Nakashima <hnakashima@chromium.org>
Reviewed-by: dsinclair <dsinclair@chromium.org>
Reviewed-by: Nicolás Peña <npm@chromium.org>
-rw-r--r-- | core/fpdfapi/parser/cpdf_document.cpp | 4 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_document.h | 1 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_parser.cpp | 19 | ||||
-rw-r--r-- | fpdfsdk/fpdfdoc.cpp | 1 | ||||
-rw-r--r-- | samples/pdfium_test.cc | 17 |
5 files changed, 40 insertions, 2 deletions
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp index 1fdd59e615..9bfb16b97e 100644 --- a/core/fpdfapi/parser/cpdf_document.cpp +++ b/core/fpdfapi/parser/cpdf_document.cpp @@ -374,6 +374,10 @@ void CPDF_Document::LoadDocInternal() { if (!m_pRootDict) return; + LoadDocumentInfo(); +} + +void CPDF_Document::LoadDocumentInfo() { CPDF_Object* pInfoObj = GetOrParseIndirectObject(m_pParser->GetInfoObjNum()); if (pInfoObj) m_pInfoDict = pInfoObj->GetDict(); diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h index 493c1edb32..e6107e1040 100644 --- a/core/fpdfapi/parser/cpdf_document.h +++ b/core/fpdfapi/parser/cpdf_document.h @@ -87,6 +87,7 @@ class CPDF_Document : public CPDF_IndirectObjectHolder { void LoadDoc(); void LoadLinearizedDoc(const CPDF_LinearizedHeader* pLinearizationParams); void LoadPages(); + void LoadDocumentInfo(); void CreateNewDoc(); CPDF_Dictionary* CreateNewPage(int iPage); diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp index 01c3b8ccd1..b2bde8d242 100644 --- a/core/fpdfapi/parser/cpdf_parser.cpp +++ b/core/fpdfapi/parser/cpdf_parser.cpp @@ -360,6 +360,9 @@ bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos, if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount)) return false; + if (m_pTrailer) + m_Trailers.push_back(std::move(m_pTrailer)); + m_pTrailer = LoadTrailerV4(); if (!m_pTrailer) return false; @@ -1084,7 +1087,18 @@ uint32_t CPDF_Parser::GetRootObjNum() { uint32_t CPDF_Parser::GetInfoObjNum() { CPDF_Reference* pRef = ToReference(m_pTrailer ? m_pTrailer->GetObjectFor("Info") : nullptr); - return pRef ? pRef->GetRefObjNum() : 0; + if (pRef) + return pRef->GetRefObjNum(); + + // Search trailers array from latest to earliest revision, as we want the + // most recent Info object number. + // See PDF 1.7 spec, section 3.4.5 - Incremental Updates. + for (auto it = m_Trailers.rbegin(); it != m_Trailers.rend(); ++it) { + pRef = ToReference(it->get()->GetObjectFor("Info")); + if (pRef) + return pRef->GetRefObjNum(); + } + return 0; } std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObject( @@ -1545,7 +1559,8 @@ bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) { CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() { uint32_t dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum; m_pSyntax->m_MetadataObjnum = 0; - m_pTrailer.reset(); + if (m_pTrailer) + m_Trailers.push_back(std::move(m_pTrailer)); m_pSyntax->SetPos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset); uint8_t ch = 0; diff --git a/fpdfsdk/fpdfdoc.cpp b/fpdfsdk/fpdfdoc.cpp index 1c445e6f14..7be53a6554 100644 --- a/fpdfsdk/fpdfdoc.cpp +++ b/fpdfsdk/fpdfdoc.cpp @@ -394,6 +394,7 @@ DLLEXPORT unsigned long STDCALL FPDF_GetMetaText(FPDF_DOCUMENT document, CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); if (!pDoc) return 0; + pDoc->LoadDocumentInfo(); CPDF_Dictionary* pInfo = pDoc->GetInfo(); if (!pInfo) return 0; diff --git a/samples/pdfium_test.cc b/samples/pdfium_test.cc index 86f6f89d24..14aa6c187d 100644 --- a/samples/pdfium_test.cc +++ b/samples/pdfium_test.cc @@ -74,12 +74,14 @@ enum OutputFormat { struct Options { Options() : show_config(false), + show_metadata(false), send_events(false), pages(false), md5(false), output_format(OUTPUT_NONE) {} bool show_config; + bool show_metadata; bool send_events; bool pages; bool md5; @@ -625,6 +627,8 @@ bool ParseCommandLine(const std::vector<std::string>& args, const std::string& cur_arg = args[cur_idx]; if (cur_arg == "--show-config") { options->show_config = true; + } else if (cur_arg == "--show-metadata") { + options->show_metadata = true; } else if (cur_arg == "--send-events") { options->send_events = true; } else if (cur_arg == "--ppm") { @@ -1108,6 +1112,19 @@ void RenderPdf(const std::string& name, (void)FPDF_GetDocPermissions(doc.get()); + if (options.show_metadata) { + const char* metaTags[] = {"Title", "Author", "Subject", "Keywords", + "Creator", "Producer", "CreationDate", "ModDate"}; + for (const char* metaTag : metaTags) { + char metaBuffer[4096]; + int len = FPDF_GetMetaText(doc.get(), metaTag, metaBuffer, 4096); + printf("%-12s = %ls (%d bytes)\n", metaTag, + GetPlatformWString(reinterpret_cast<unsigned short*>(metaBuffer)) + .c_str(), + len); + } + } + std::unique_ptr<void, FPDFFormHandleDeleter> form( FPDFDOC_InitFormFillEnvironment(doc.get(), &form_callbacks)); form_callbacks.form_handle = form.get(); |