diff options
author | dan sinclair <dsinclair@chromium.org> | 2017-04-06 14:44:02 -0400 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2017-04-08 01:38:54 +0000 |
commit | d9dad3a1915973d113f1f8685474a5a8c1f4faac (patch) | |
tree | bd34c61ace230963b1c7f92b30cf025cdd16f380 | |
parent | 746c28772e01675096800d8853aae33f94ed3d55 (diff) | |
download | pdfium-d9dad3a1915973d113f1f8685474a5a8c1f4faac.tar.xz |
Add title (/T) extraction for PDF tagged structureschromium/3067chromium/3066
This CL adds the ability to extract the title from a tagged structure element if
one exists.
Bug: pdfium:672
Change-Id: I22e2a8371db4f08b8a70dd77002f1befab97f530
Reviewed-on: https://pdfium-review.googlesource.com/3819
Reviewed-by: Lei Zhang <thestig@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: dsinclair <dsinclair@chromium.org>
-rw-r--r-- | core/fpdfdoc/cpdf_structelement.cpp | 3 | ||||
-rw-r--r-- | core/fpdfdoc/cpdf_structelement.h | 2 | ||||
-rw-r--r-- | fpdfsdk/fpdf_structtree.cpp | 10 | ||||
-rw-r--r-- | public/fpdf_structtree.h | 20 | ||||
-rw-r--r-- | samples/pdfium_test.cc | 5 |
5 files changed, 39 insertions, 1 deletions
diff --git a/core/fpdfdoc/cpdf_structelement.cpp b/core/fpdfdoc/cpdf_structelement.cpp index 137d5b32e4..c85ae0dd42 100644 --- a/core/fpdfdoc/cpdf_structelement.cpp +++ b/core/fpdfdoc/cpdf_structelement.cpp @@ -33,7 +33,8 @@ CPDF_StructElement::CPDF_StructElement(CPDF_StructTree* pTree, : m_pTree(pTree), m_pParent(pParent), m_pDict(pDict), - m_Type(pDict->GetStringFor("S")) { + m_Type(pDict->GetStringFor("S")), + m_Title(pDict->GetStringFor("T")) { if (pTree->GetRoleMap()) { CFX_ByteString mapped = pTree->GetRoleMap()->GetStringFor(m_Type); if (!mapped.IsEmpty()) diff --git a/core/fpdfdoc/cpdf_structelement.h b/core/fpdfdoc/cpdf_structelement.h index ba0685e895..c65363db53 100644 --- a/core/fpdfdoc/cpdf_structelement.h +++ b/core/fpdfdoc/cpdf_structelement.h @@ -39,6 +39,7 @@ class CPDF_StructElement : public CFX_Retainable { friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args); const CFX_ByteString& GetType() const { return m_Type; } + const CFX_ByteString& GetTitle() const { return m_Title; } CPDF_Dictionary* GetDict() const { return m_pDict; } int CountKids() const; @@ -58,6 +59,7 @@ class CPDF_StructElement : public CFX_Retainable { CPDF_StructElement* const m_pParent; CPDF_Dictionary* const m_pDict; CFX_ByteString m_Type; + CFX_ByteString m_Title; std::vector<CPDF_StructKid> m_Kids; }; diff --git a/fpdfsdk/fpdf_structtree.cpp b/fpdfsdk/fpdf_structtree.cpp index 96d40b41c2..74c44f8083 100644 --- a/fpdfsdk/fpdf_structtree.cpp +++ b/fpdfsdk/fpdf_structtree.cpp @@ -83,6 +83,16 @@ FPDF_StructElement_GetType(FPDF_STRUCTELEMENT struct_element, : 0; } +DLLEXPORT unsigned long STDCALL +FPDF_StructElement_GetTitle(FPDF_STRUCTELEMENT struct_element, + void* buffer, + unsigned long buflen) { + CPDF_StructElement* elem = ToStructTreeElement(struct_element); + return elem + ? WideStringToBuffer(elem->GetTitle().UTF8Decode(), buffer, buflen) + : 0; +} + DLLEXPORT int STDCALL FPDF_StructElement_CountChildren(FPDF_STRUCTELEMENT struct_element) { CPDF_StructElement* elem = ToStructTreeElement(struct_element); diff --git a/public/fpdf_structtree.h b/public/fpdf_structtree.h index 6f85d4222e..9cf46cc306 100644 --- a/public/fpdf_structtree.h +++ b/public/fpdf_structtree.h @@ -93,6 +93,26 @@ FPDF_StructElement_GetType(FPDF_STRUCTELEMENT struct_element, void* buffer, unsigned long buflen); +// Function: FPDF_StructElement_GetTitle +// Get the title (/T) for a given element. +// Parameters: +// struct_element - Handle to the struct element. +// buffer - A buffer for output. May be NULL. +// buflen - The length of the buffer, in bytes. May be 0. +// Return value: +// The number of bytes in the title, including the terminating NUL +// character. The number of bytes is returned regardless of the +// |buffer| and |buflen| parameters. +// Comments: +// Regardless of the platform, the |buffer| is always in UTF-16LE +// encoding. The string is terminated by a UTF16 NUL character. If +// |buflen| is less than the required length, or |buffer| is NULL, +// |buffer| will not be modified. +DLLEXPORT unsigned long STDCALL +FPDF_StructElement_GetTitle(FPDF_STRUCTELEMENT struct_element, + void* buffer, + unsigned long buflen); + // Function: FPDF_StructElement_CountChildren // Count the number of children for the structure element. // Parameters: diff --git a/samples/pdfium_test.cc b/samples/pdfium_test.cc index 1dc76fee5b..d2b3c01196 100644 --- a/samples/pdfium_test.cc +++ b/samples/pdfium_test.cc @@ -641,6 +641,11 @@ void DumpChildStructure(FPDF_STRUCTELEMENT child, int indent) { printf("%*s%ls", indent * 2, "", ConvertToWString(buf, len).c_str()); memset(buf, 0, sizeof(buf)); + len = FPDF_StructElement_GetTitle(child, buf, kBufSize); + if (len > 0) + printf(": '%ls'", ConvertToWString(buf, len).c_str()); + + memset(buf, 0, sizeof(buf)); len = FPDF_StructElement_GetAltText(child, buf, kBufSize); if (len > 0) printf(" (%ls)", ConvertToWString(buf, len).c_str()); |