From 29479f6380942515b06240c4933bf3b4cf502013 Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Tue, 4 Apr 2017 10:48:19 -0400 Subject: Add method to get tagged type This CL adds a FPDF_StructElement_GetType method to get the type of a given tagged struct. This corresponds to the /S key in the structure element dictionary. See PDF1.7 Table 10.10. Bug: pdfium:672 Change-Id: I313eb886dc9623715995d73a76bce01ba9985e4b Reviewed-on: https://pdfium-review.googlesource.com/3623 Commit-Queue: dsinclair Reviewed-by: Lei Zhang --- fpdfsdk/fpdf_structtree.cpp | 40 ++++++++++++++++++++------------ fpdfsdk/fpdf_structtree_embeddertest.cpp | 29 +++++++++++++++++++++++ fpdfsdk/fpdfview_c_api_test.c | 1 + public/fpdf_structtree.h | 20 ++++++++++++++++ 4 files changed, 75 insertions(+), 15 deletions(-) diff --git a/fpdfsdk/fpdf_structtree.cpp b/fpdfsdk/fpdf_structtree.cpp index 06713fba1d..8a93d2299d 100644 --- a/fpdfsdk/fpdf_structtree.cpp +++ b/fpdfsdk/fpdf_structtree.cpp @@ -21,6 +21,19 @@ IPDF_StructElement* ToStructTreeElement(FPDF_STRUCTELEMENT struct_element) { return reinterpret_cast(struct_element); } +unsigned long WideStringToBuffer(const CFX_WideString& str, + void* buffer, + unsigned long buflen) { + if (str.IsEmpty()) + return 0; + + CFX_ByteString encodedStr = str.UTF16LE_Encode(); + const unsigned long len = encodedStr.GetLength(); + if (buffer && len <= buflen) + memcpy(buffer, encodedStr.c_str(), len); + return len; +} + } // namespace DLLEXPORT FPDF_STRUCTTREE STDCALL FPDF_StructTree_GetForPage(FPDF_PAGE page) { @@ -54,22 +67,19 @@ FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT struct_element, void* buffer, unsigned long buflen) { IPDF_StructElement* elem = ToStructTreeElement(struct_element); - if (!elem) - return 0; - - CPDF_Dictionary* dict = elem->GetDict(); - if (!dict) - return 0; - - CFX_WideString str = elem->GetDict()->GetUnicodeTextFor("Alt"); - if (str.IsEmpty()) - return 0; + return (elem && elem->GetDict()) + ? WideStringToBuffer(elem->GetDict()->GetUnicodeTextFor("Alt"), + buffer, buflen) + : 0; +} - CFX_ByteString encodedStr = str.UTF16LE_Encode(); - const unsigned long len = encodedStr.GetLength(); - if (buffer && len <= buflen) - memcpy(buffer, encodedStr.c_str(), len); - return len; +DLLEXPORT unsigned long STDCALL +FPDF_StructElement_GetType(FPDF_STRUCTELEMENT struct_element, + void* buffer, + unsigned long buflen) { + IPDF_StructElement* elem = ToStructTreeElement(struct_element); + return elem ? WideStringToBuffer(elem->GetType().UTF8Decode(), buffer, buflen) + : 0; } DLLEXPORT int STDCALL diff --git a/fpdfsdk/fpdf_structtree_embeddertest.cpp b/fpdfsdk/fpdf_structtree_embeddertest.cpp index 8ddde5317c..3110988d82 100644 --- a/fpdfsdk/fpdf_structtree_embeddertest.cpp +++ b/fpdfsdk/fpdf_structtree_embeddertest.cpp @@ -68,3 +68,32 @@ TEST_F(FPDFStructTreeEmbeddertest, GetAltText) { FPDF_StructTree_Close(struct_tree); FPDF_ClosePage(page); } + +TEST_F(FPDFStructTreeEmbeddertest, GetType) { + ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf")); + FPDF_PAGE page = LoadPage(0); + ASSERT_TRUE(page); + + FPDF_STRUCTTREE struct_tree = FPDF_StructTree_GetForPage(page); + ASSERT_TRUE(struct_tree); + ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree)); + + FPDF_STRUCTELEMENT element = FPDF_StructTree_GetChildAtIndex(struct_tree, 0); + ASSERT_NE(nullptr, element); + + unsigned short buffer[12]; + memset(buffer, 0, sizeof(buffer)); + // Deliberately pass in a small buffer size to make sure |buffer| remains + // untouched. + ASSERT_EQ(18U, FPDF_StructElement_GetType(element, buffer, 1)); + for (size_t i = 0; i < FX_ArraySize(buffer); ++i) + EXPECT_EQ(0U, buffer[i]); + + ASSERT_EQ(18U, FPDF_StructElement_GetType(element, buffer, sizeof(buffer))); + const wchar_t kExpected[] = L"Document"; + EXPECT_EQ(CFX_WideString(kExpected), + CFX_WideString::FromUTF16LE(buffer, FXSYS_len(kExpected))); + + FPDF_StructTree_Close(struct_tree); + FPDF_ClosePage(page); +} diff --git a/fpdfsdk/fpdfview_c_api_test.c b/fpdfsdk/fpdfview_c_api_test.c index fec91efeca..9afbdd479e 100644 --- a/fpdfsdk/fpdfview_c_api_test.c +++ b/fpdfsdk/fpdfview_c_api_test.c @@ -178,6 +178,7 @@ int CheckPDFiumCApi() { CHK(FPDF_StructTree_CountChildren); CHK(FPDF_StructTree_GetChildAtIndex); CHK(FPDF_StructElement_GetAltText); + CHK(FPDF_StructElement_GetType); CHK(FPDF_StructElement_CountChildren); CHK(FPDF_StructElement_GetChildAtIndex); diff --git a/public/fpdf_structtree.h b/public/fpdf_structtree.h index 3d4da402aa..82156365d9 100644 --- a/public/fpdf_structtree.h +++ b/public/fpdf_structtree.h @@ -73,6 +73,26 @@ FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT struct_element, void* buffer, unsigned long buflen); +// Function: FPDF_StructElement_GetType +// Get the type (/S) for a given element. +// Parameters: +// struct_element - Handle to the struct element. +// buffer - A buffer for output. May be NULL. +// buflen - The length of the buffer, in bytes. May be 0. +// Return value: +// The number of bytes in the type, including the terminating NUL +// character. The number of bytes is returned regardless of the +// |buffer| and |buflen| parameters. +// Comments: +// Regardless of the platform, the |buffer| is always in UTF-16LE +// encoding. The string is terminated by a UTF16 NUL character. If +// |buflen| is less than the required length, or |buffer| is NULL, +// |buffer| will not be modified. +DLLEXPORT unsigned long STDCALL +FPDF_StructElement_GetType(FPDF_STRUCTELEMENT struct_element, + void* buffer, + unsigned long buflen); + // Function: FPDF_StructElement_CountChildren // Count the number of children for the structure element. // Parameters: -- cgit v1.2.3