From 733e068e077bb59597180bc9b8ff934dd125ffdc Mon Sep 17 00:00:00 2001 From: thestig Date: Wed, 23 Nov 2016 05:52:39 -0800 Subject: Add API for getting page labels. BUG=pdfium:479 Review-Url: https://codereview.chromium.org/2521843003 --- core/fpdfdoc/cpdf_pagelabel.cpp | 28 +++++++++----- core/fpdfdoc/cpdf_pagelabel.h | 2 +- fpdfsdk/fpdfdoc.cpp | 77 +++++++++++++++++++++++--------------- fpdfsdk/fpdfdoc_embeddertest.cpp | 54 ++++++++++++++++++++++++++ fpdfsdk/fpdfview_c_api_test.c | 1 + public/fpdf_doc.h | 39 +++++++++++++------ testing/resources/page_labels.pdf | Bin 0 -> 10080 bytes 7 files changed, 149 insertions(+), 52 deletions(-) create mode 100644 testing/resources/page_labels.pdf diff --git a/core/fpdfdoc/cpdf_pagelabel.cpp b/core/fpdfdoc/cpdf_pagelabel.cpp index 64075ef55a..2a79d77ca7 100644 --- a/core/fpdfdoc/cpdf_pagelabel.cpp +++ b/core/fpdfdoc/cpdf_pagelabel.cpp @@ -75,16 +75,21 @@ CFX_WideString GetLabelNumPortion(int num, const CFX_ByteString& bsStyle) { CPDF_PageLabel::CPDF_PageLabel(CPDF_Document* pDocument) : m_pDocument(pDocument) {} -CFX_WideString CPDF_PageLabel::GetLabel(int nPage) const { - CFX_WideString wsLabel; +bool CPDF_PageLabel::GetLabel(int nPage, CFX_WideString* wsLabel) const { if (!m_pDocument) - return wsLabel; + return false; + + if (nPage < 0 || nPage >= m_pDocument->GetPageCount()) + return false; CPDF_Dictionary* pPDFRoot = m_pDocument->GetRoot(); if (!pPDFRoot) - return wsLabel; + return false; CPDF_Dictionary* pLabels = pPDFRoot->GetDictFor("PageLabels"); + if (!pLabels) + return false; + CPDF_NumberTree numberTree(pLabels); CPDF_Object* pValue = nullptr; int n = nPage; @@ -99,18 +104,18 @@ CFX_WideString CPDF_PageLabel::GetLabel(int nPage) const { pValue = pValue->GetDirect(); if (CPDF_Dictionary* pLabel = pValue->AsDictionary()) { if (pLabel->KeyExist("P")) - wsLabel += pLabel->GetUnicodeTextFor("P"); + *wsLabel += pLabel->GetUnicodeTextFor("P"); CFX_ByteString bsNumberingStyle = pLabel->GetStringFor("S", ""); int nLabelNum = nPage - n + pLabel->GetIntegerFor("St", 1); CFX_WideString wsNumPortion = GetLabelNumPortion(nLabelNum, bsNumberingStyle); - wsLabel += wsNumPortion; - return wsLabel; + *wsLabel += wsNumPortion; + return true; } } - wsLabel.Format(L"%d", nPage + 1); - return wsLabel; + wsLabel->Format(L"%d", nPage + 1); + return true; } int32_t CPDF_PageLabel::GetPageByLabel(const CFX_ByteStringC& bsLabel) const { @@ -123,7 +128,10 @@ int32_t CPDF_PageLabel::GetPageByLabel(const CFX_ByteStringC& bsLabel) const { int nPages = m_pDocument->GetPageCount(); for (int i = 0; i < nPages; i++) { - if (PDF_EncodeText(GetLabel(i)).Compare(bsLabel)) + CFX_WideString str; + if (!GetLabel(i, &str)) + continue; + if (PDF_EncodeText(str).Compare(bsLabel)) return i; } diff --git a/core/fpdfdoc/cpdf_pagelabel.h b/core/fpdfdoc/cpdf_pagelabel.h index 6a0664bfca..0f91f614d9 100644 --- a/core/fpdfdoc/cpdf_pagelabel.h +++ b/core/fpdfdoc/cpdf_pagelabel.h @@ -15,7 +15,7 @@ class CPDF_PageLabel { public: explicit CPDF_PageLabel(CPDF_Document* pDocument); - CFX_WideString GetLabel(int nPage) const; + bool GetLabel(int nPage, CFX_WideString* wsLabel) const; int32_t GetPageByLabel(const CFX_ByteStringC& bsLabel) const; int32_t GetPageByLabel(const CFX_WideStringC& wsLabel) const; diff --git a/fpdfsdk/fpdfdoc.cpp b/fpdfsdk/fpdfdoc.cpp index 01d91242f4..254be3f883 100644 --- a/fpdfsdk/fpdfdoc.cpp +++ b/fpdfsdk/fpdfdoc.cpp @@ -15,6 +15,7 @@ #include "core/fpdfdoc/cpdf_bookmark.h" #include "core/fpdfdoc/cpdf_bookmarktree.h" #include "core/fpdfdoc/cpdf_dest.h" +#include "core/fpdfdoc/cpdf_pagelabel.h" #include "fpdfsdk/fsdk_define.h" #include "third_party/base/stl_util.h" @@ -58,6 +59,16 @@ CPDF_LinkList* GetLinkList(CPDF_Page* page) { return pHolder->get(); } +unsigned long Utf16EncodeMaybeCopyAndReturnLength(const CFX_WideString& text, + void* buffer, + unsigned long buflen) { + CFX_ByteString encodedText = text.UTF16LE_Encode(); + unsigned long len = encodedText.GetLength(); + if (buffer && buflen >= len) + FXSYS_memcpy(buffer, encodedText.c_str(), len); + return len; +} + } // namespace DLLEXPORT FPDF_BOOKMARK STDCALL @@ -91,12 +102,7 @@ DLLEXPORT unsigned long STDCALL FPDFBookmark_GetTitle(FPDF_BOOKMARK pDict, return 0; CPDF_Bookmark bookmark(ToDictionary(static_cast(pDict))); CFX_WideString title = bookmark.GetTitle(); - CFX_ByteString encodedTitle = title.UTF16LE_Encode(); - unsigned long len = encodedTitle.GetLength(); - if (buffer && buflen >= len) { - FXSYS_memcpy(buffer, encodedTitle.c_str(), len); - } - return len; + return Utf16EncodeMaybeCopyAndReturnLength(title, buffer, buflen); } DLLEXPORT FPDF_BOOKMARK STDCALL FPDFBookmark_Find(FPDF_DOCUMENT document, @@ -354,42 +360,53 @@ DLLEXPORT FPDF_BOOL STDCALL FPDFLink_GetQuadPoints(FPDF_LINK linkAnnot, CPDF_Dictionary* pAnnotDict = ToDictionary(static_cast(linkAnnot)); CPDF_Array* pArray = pAnnotDict->GetArrayFor("QuadPoints"); - if (pArray) { - if (quadIndex < 0 || - static_cast(quadIndex) >= pArray->GetCount() / 8 || - (static_cast(quadIndex * 8 + 7) >= pArray->GetCount())) - return false; - quadPoints->x1 = pArray->GetNumberAt(quadIndex * 8); - quadPoints->y1 = pArray->GetNumberAt(quadIndex * 8 + 1); - quadPoints->x2 = pArray->GetNumberAt(quadIndex * 8 + 2); - quadPoints->y2 = pArray->GetNumberAt(quadIndex * 8 + 3); - quadPoints->x3 = pArray->GetNumberAt(quadIndex * 8 + 4); - quadPoints->y3 = pArray->GetNumberAt(quadIndex * 8 + 5); - quadPoints->x4 = pArray->GetNumberAt(quadIndex * 8 + 6); - quadPoints->y4 = pArray->GetNumberAt(quadIndex * 8 + 7); - return true; + if (!pArray) + return false; + + if (quadIndex < 0 || + static_cast(quadIndex) >= pArray->GetCount() / 8 || + (static_cast(quadIndex * 8 + 7) >= pArray->GetCount())) { + return false; } - return false; + + quadPoints->x1 = pArray->GetNumberAt(quadIndex * 8); + quadPoints->y1 = pArray->GetNumberAt(quadIndex * 8 + 1); + quadPoints->x2 = pArray->GetNumberAt(quadIndex * 8 + 2); + quadPoints->y2 = pArray->GetNumberAt(quadIndex * 8 + 3); + quadPoints->x3 = pArray->GetNumberAt(quadIndex * 8 + 4); + quadPoints->y3 = pArray->GetNumberAt(quadIndex * 8 + 5); + quadPoints->x4 = pArray->GetNumberAt(quadIndex * 8 + 6); + quadPoints->y4 = pArray->GetNumberAt(quadIndex * 8 + 7); + return true; } -DLLEXPORT unsigned long STDCALL FPDF_GetMetaText(FPDF_DOCUMENT doc, +DLLEXPORT unsigned long STDCALL FPDF_GetMetaText(FPDF_DOCUMENT document, FPDF_BYTESTRING tag, void* buffer, unsigned long buflen) { if (!tag) return 0; - CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(doc); + CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); if (!pDoc) return 0; CPDF_Dictionary* pInfo = pDoc->GetInfo(); if (!pInfo) return 0; CFX_WideString text = pInfo->GetUnicodeTextFor(tag); - // Use UTF-16LE encoding - CFX_ByteString encodedText = text.UTF16LE_Encode(); - unsigned long len = encodedText.GetLength(); - if (buffer && buflen >= len) { - FXSYS_memcpy(buffer, encodedText.c_str(), len); - } - return len; + return Utf16EncodeMaybeCopyAndReturnLength(text, buffer, buflen); +} + +DLLEXPORT unsigned long STDCALL FPDF_GetPagelLabel(FPDF_DOCUMENT document, + int page_index, + void* buffer, + unsigned long buflen) { + if (page_index < 0) + return 0; + + // CPDF_PageLabel can deal with NULL |document|. + CPDF_PageLabel label(CPDFDocumentFromFPDFDocument(document)); + CFX_WideString str; + if (!label.GetLabel(page_index, &str)) + return 0; + return Utf16EncodeMaybeCopyAndReturnLength(str, buffer, buflen); } diff --git a/fpdfsdk/fpdfdoc_embeddertest.cpp b/fpdfsdk/fpdfdoc_embeddertest.cpp index d7f1f97117..67934c5626 100644 --- a/fpdfsdk/fpdfdoc_embeddertest.cpp +++ b/fpdfsdk/fpdfdoc_embeddertest.cpp @@ -172,3 +172,57 @@ TEST_F(FPDFDocEmbeddertest, DeletePage) { FPDFPage_Delete(document(), 0); EXPECT_EQ(0, FPDF_GetPageCount(document())); } + +TEST_F(FPDFDocEmbeddertest, NoPageLabels) { + EXPECT_TRUE(OpenDocument("about_blank.pdf")); + EXPECT_EQ(1, FPDF_GetPageCount(document())); + + ASSERT_EQ(0u, FPDF_GetPagelLabel(document(), 0, nullptr, 0)); +} + +TEST_F(FPDFDocEmbeddertest, GetPageLabels) { + EXPECT_TRUE(OpenDocument("page_labels.pdf")); + EXPECT_EQ(7, FPDF_GetPageCount(document())); + + unsigned short buf[128]; + EXPECT_EQ(0u, FPDF_GetPagelLabel(document(), -2, buf, sizeof(buf))); + EXPECT_EQ(0u, FPDF_GetPagelLabel(document(), -1, buf, sizeof(buf))); + + const FX_WCHAR kExpectedPageLabel0[] = L"i"; + ASSERT_EQ(4u, FPDF_GetPagelLabel(document(), 0, buf, sizeof(buf))); + EXPECT_EQ(CFX_WideString(kExpectedPageLabel0), + CFX_WideString::FromUTF16LE(buf, FXSYS_len(kExpectedPageLabel0))); + + const FX_WCHAR kExpectedPageLabel1[] = L"ii"; + ASSERT_EQ(6u, FPDF_GetPagelLabel(document(), 1, buf, sizeof(buf))); + EXPECT_EQ(CFX_WideString(kExpectedPageLabel1), + CFX_WideString::FromUTF16LE(buf, FXSYS_len(kExpectedPageLabel1))); + + const FX_WCHAR kExpectedPageLabel2[] = L"1"; + ASSERT_EQ(4u, FPDF_GetPagelLabel(document(), 2, buf, sizeof(buf))); + EXPECT_EQ(CFX_WideString(kExpectedPageLabel2), + CFX_WideString::FromUTF16LE(buf, FXSYS_len(kExpectedPageLabel2))); + + const FX_WCHAR kExpectedPageLabel3[] = L"2"; + ASSERT_EQ(4u, FPDF_GetPagelLabel(document(), 3, buf, sizeof(buf))); + EXPECT_EQ(CFX_WideString(kExpectedPageLabel3), + CFX_WideString::FromUTF16LE(buf, FXSYS_len(kExpectedPageLabel3))); + + const FX_WCHAR kExpectedPageLabel4[] = L"zzA"; + ASSERT_EQ(8u, FPDF_GetPagelLabel(document(), 4, buf, sizeof(buf))); + EXPECT_EQ(CFX_WideString(kExpectedPageLabel4), + CFX_WideString::FromUTF16LE(buf, FXSYS_len(kExpectedPageLabel4))); + + const FX_WCHAR kExpectedPageLabel5[] = L"zzB"; + ASSERT_EQ(8u, FPDF_GetPagelLabel(document(), 5, buf, sizeof(buf))); + EXPECT_EQ(CFX_WideString(kExpectedPageLabel5), + CFX_WideString::FromUTF16LE(buf, FXSYS_len(kExpectedPageLabel5))); + + const FX_WCHAR kExpectedPageLabel6[] = L""; + ASSERT_EQ(2u, FPDF_GetPagelLabel(document(), 6, buf, sizeof(buf))); + EXPECT_EQ(CFX_WideString(kExpectedPageLabel6), + CFX_WideString::FromUTF16LE(buf, FXSYS_len(kExpectedPageLabel6))); + + ASSERT_EQ(0u, FPDF_GetPagelLabel(document(), 7, buf, sizeof(buf))); + ASSERT_EQ(0u, FPDF_GetPagelLabel(document(), 8, buf, sizeof(buf))); +} diff --git a/fpdfsdk/fpdfview_c_api_test.c b/fpdfsdk/fpdfview_c_api_test.c index afc357f1e3..5e6c36f2b1 100644 --- a/fpdfsdk/fpdfview_c_api_test.c +++ b/fpdfsdk/fpdfview_c_api_test.c @@ -64,6 +64,7 @@ int CheckPDFiumCApi() { CHK(FPDFLink_CountQuadPoints); CHK(FPDFLink_GetQuadPoints); CHK(FPDF_GetMetaText); + CHK(FPDF_GetPagelLabel); // fpdf_edit.h CHK(FPDF_CreateNewDocument); diff --git a/public/fpdf_doc.h b/public/fpdf_doc.h index 9d2229c0c1..b245d46900 100644 --- a/public/fpdf_doc.h +++ b/public/fpdf_doc.h @@ -281,26 +281,43 @@ DLLEXPORT FPDF_BOOL STDCALL FPDFLink_GetQuadPoints(FPDF_LINK linkAnnot, // Get meta-data |tag| content from |document|. // -// doc - handle to the document -// tag - the tag to retrieve. The tag can be one of: -// Title, Author, Subject, Keywords, Creator, Producer, -// CreationDate, or ModDate. -// For detailed explanations of these tags and their respective -// values, please refer to PDF Reference 1.6, section 10.2.1, -// 'Document Information Dictionary'. -// buffer - a buffer for the title. May be NULL. -// buflen - the length of the buffer, in bytes. May be 0. +// document - handle to the document. +// tag - the tag to retrieve. The tag can be one of: +// Title, Author, Subject, Keywords, Creator, Producer, +// CreationDate, or ModDate. +// For detailed explanations of these tags and their respective +// values, please refer to PDF Reference 1.6, section 10.2.1, +// 'Document Information Dictionary'. +// buffer - a buffer for the tag. May be NULL. +// buflen - the length of the buffer, in bytes. May be 0. // -// Returns the number of bytes in the title, including trailing zeros. +// Returns the number of bytes in the tag, including trailing zeros. // // The |buffer| is always encoded in UTF-16LE. The |buffer| is followed by two // bytes of zeros indicating the end of the string. If |buflen| is less than // the returned length, or |buffer| is NULL, |buffer| will not be modified. -DLLEXPORT unsigned long STDCALL FPDF_GetMetaText(FPDF_DOCUMENT doc, +DLLEXPORT unsigned long STDCALL FPDF_GetMetaText(FPDF_DOCUMENT document, FPDF_BYTESTRING tag, void* buffer, unsigned long buflen); +// Get the page label for |page_index| from |document|. +// +// document - handle to the document. +// page_index - the 0-based index of the page. +// buffer - a buffer for the page label. May be NULL. +// buflen - the length of the buffer, in bytes. May be 0. +// +// Returns the number of bytes in the page label, including trailing zeros. +// +// The |buffer| is always encoded in UTF-16LE. The |buffer| is followed by two +// bytes of zeros indicating the end of the string. If |buflen| is less than +// the returned length, or |buffer| is NULL, |buffer| will not be modified. +DLLEXPORT unsigned long STDCALL FPDF_GetPagelLabel(FPDF_DOCUMENT document, + int page_index, + void* buffer, + unsigned long buflen); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/testing/resources/page_labels.pdf b/testing/resources/page_labels.pdf new file mode 100644 index 0000000000..fd1a1e55e9 Binary files /dev/null and b/testing/resources/page_labels.pdf differ -- cgit v1.2.3