summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordan sinclair <dsinclair@chromium.org>2017-04-06 14:44:02 -0400
committerChromium commit bot <commit-bot@chromium.org>2017-04-08 01:38:54 +0000
commitd9dad3a1915973d113f1f8685474a5a8c1f4faac (patch)
treebd34c61ace230963b1c7f92b30cf025cdd16f380
parent746c28772e01675096800d8853aae33f94ed3d55 (diff)
downloadpdfium-d9dad3a1915973d113f1f8685474a5a8c1f4faac.tar.xz
Add title (/T) extraction for PDF tagged structureschromium/3067chromium/3066
This CL adds the ability to extract the title from a tagged structure element if one exists. Bug: pdfium:672 Change-Id: I22e2a8371db4f08b8a70dd77002f1befab97f530 Reviewed-on: https://pdfium-review.googlesource.com/3819 Reviewed-by: Lei Zhang <thestig@chromium.org> Reviewed-by: Tom Sepez <tsepez@chromium.org> Commit-Queue: dsinclair <dsinclair@chromium.org>
-rw-r--r--core/fpdfdoc/cpdf_structelement.cpp3
-rw-r--r--core/fpdfdoc/cpdf_structelement.h2
-rw-r--r--fpdfsdk/fpdf_structtree.cpp10
-rw-r--r--public/fpdf_structtree.h20
-rw-r--r--samples/pdfium_test.cc5
5 files changed, 39 insertions, 1 deletions
diff --git a/core/fpdfdoc/cpdf_structelement.cpp b/core/fpdfdoc/cpdf_structelement.cpp
index 137d5b32e4..c85ae0dd42 100644
--- a/core/fpdfdoc/cpdf_structelement.cpp
+++ b/core/fpdfdoc/cpdf_structelement.cpp
@@ -33,7 +33,8 @@ CPDF_StructElement::CPDF_StructElement(CPDF_StructTree* pTree,
: m_pTree(pTree),
m_pParent(pParent),
m_pDict(pDict),
- m_Type(pDict->GetStringFor("S")) {
+ m_Type(pDict->GetStringFor("S")),
+ m_Title(pDict->GetStringFor("T")) {
if (pTree->GetRoleMap()) {
CFX_ByteString mapped = pTree->GetRoleMap()->GetStringFor(m_Type);
if (!mapped.IsEmpty())
diff --git a/core/fpdfdoc/cpdf_structelement.h b/core/fpdfdoc/cpdf_structelement.h
index ba0685e895..c65363db53 100644
--- a/core/fpdfdoc/cpdf_structelement.h
+++ b/core/fpdfdoc/cpdf_structelement.h
@@ -39,6 +39,7 @@ class CPDF_StructElement : public CFX_Retainable {
friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args);
const CFX_ByteString& GetType() const { return m_Type; }
+ const CFX_ByteString& GetTitle() const { return m_Title; }
CPDF_Dictionary* GetDict() const { return m_pDict; }
int CountKids() const;
@@ -58,6 +59,7 @@ class CPDF_StructElement : public CFX_Retainable {
CPDF_StructElement* const m_pParent;
CPDF_Dictionary* const m_pDict;
CFX_ByteString m_Type;
+ CFX_ByteString m_Title;
std::vector<CPDF_StructKid> m_Kids;
};
diff --git a/fpdfsdk/fpdf_structtree.cpp b/fpdfsdk/fpdf_structtree.cpp
index 96d40b41c2..74c44f8083 100644
--- a/fpdfsdk/fpdf_structtree.cpp
+++ b/fpdfsdk/fpdf_structtree.cpp
@@ -83,6 +83,16 @@ FPDF_StructElement_GetType(FPDF_STRUCTELEMENT struct_element,
: 0;
}
+DLLEXPORT unsigned long STDCALL
+FPDF_StructElement_GetTitle(FPDF_STRUCTELEMENT struct_element,
+ void* buffer,
+ unsigned long buflen) {
+ CPDF_StructElement* elem = ToStructTreeElement(struct_element);
+ return elem
+ ? WideStringToBuffer(elem->GetTitle().UTF8Decode(), buffer, buflen)
+ : 0;
+}
+
DLLEXPORT int STDCALL
FPDF_StructElement_CountChildren(FPDF_STRUCTELEMENT struct_element) {
CPDF_StructElement* elem = ToStructTreeElement(struct_element);
diff --git a/public/fpdf_structtree.h b/public/fpdf_structtree.h
index 6f85d4222e..9cf46cc306 100644
--- a/public/fpdf_structtree.h
+++ b/public/fpdf_structtree.h
@@ -93,6 +93,26 @@ FPDF_StructElement_GetType(FPDF_STRUCTELEMENT struct_element,
void* buffer,
unsigned long buflen);
+// Function: FPDF_StructElement_GetTitle
+// Get the title (/T) for a given element.
+// Parameters:
+// struct_element - Handle to the struct element.
+// buffer - A buffer for output. May be NULL.
+// buflen - The length of the buffer, in bytes. May be 0.
+// Return value:
+// The number of bytes in the title, including the terminating NUL
+// character. The number of bytes is returned regardless of the
+// |buffer| and |buflen| parameters.
+// Comments:
+// Regardless of the platform, the |buffer| is always in UTF-16LE
+// encoding. The string is terminated by a UTF16 NUL character. If
+// |buflen| is less than the required length, or |buffer| is NULL,
+// |buffer| will not be modified.
+DLLEXPORT unsigned long STDCALL
+FPDF_StructElement_GetTitle(FPDF_STRUCTELEMENT struct_element,
+ void* buffer,
+ unsigned long buflen);
+
// Function: FPDF_StructElement_CountChildren
// Count the number of children for the structure element.
// Parameters:
diff --git a/samples/pdfium_test.cc b/samples/pdfium_test.cc
index 1dc76fee5b..d2b3c01196 100644
--- a/samples/pdfium_test.cc
+++ b/samples/pdfium_test.cc
@@ -641,6 +641,11 @@ void DumpChildStructure(FPDF_STRUCTELEMENT child, int indent) {
printf("%*s%ls", indent * 2, "", ConvertToWString(buf, len).c_str());
memset(buf, 0, sizeof(buf));
+ len = FPDF_StructElement_GetTitle(child, buf, kBufSize);
+ if (len > 0)
+ printf(": '%ls'", ConvertToWString(buf, len).c_str());
+
+ memset(buf, 0, sizeof(buf));
len = FPDF_StructElement_GetAltText(child, buf, kBufSize);
if (len > 0)
printf(" (%ls)", ConvertToWString(buf, len).c_str());