From 1b2b76d634ba3716ee43a553cd952927a7d262a2 Mon Sep 17 00:00:00 2001 From: Nicolas Pena Date: Fri, 17 Aug 2018 19:54:23 +0000 Subject: Add FPDFText_GetFontInfo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug: pdfium:929 Change-Id: I9da03a1e317cff69ec4c76b69289cfa753b6bb77 Reviewed-on: https://pdfium-review.googlesource.com/40531 Reviewed-by: Lei Zhang Reviewed-by: Henrique Nakashima Commit-Queue: Nicolás Peña Moreno --- core/fpdfapi/font/cpdf_font.h | 1 + fpdfsdk/fpdf_text.cpp | 33 +++++++++++++++ fpdfsdk/fpdf_text_embeddertest.cpp | 83 ++++++++++++++++++++++++++++++++++++++ fpdfsdk/fpdf_view_c_api_test.c | 1 + public/fpdf_text.h | 25 ++++++++++++ 5 files changed, 143 insertions(+) diff --git a/core/fpdfapi/font/cpdf_font.h b/core/fpdfapi/font/cpdf_font.h index 53e5b7b697..3a91926e19 100644 --- a/core/fpdfapi/font/cpdf_font.h +++ b/core/fpdfapi/font/cpdf_font.h @@ -80,6 +80,7 @@ class CPDF_Font { uint32_t GetStringWidth(const ByteStringView& pString); uint32_t FallbackFontFromCharcode(uint32_t charcode); int FallbackGlyphFromCharcode(int fallbackFont, uint32_t charcode); + int GetFontFlags() const { return m_Flags; } virtual uint32_t GetCharWidthF(uint32_t charcode) = 0; virtual FX_RECT GetCharBBox(uint32_t charcode) = 0; diff --git a/fpdfsdk/fpdf_text.cpp b/fpdfsdk/fpdf_text.cpp index d8d3b2675b..a39402eb72 100644 --- a/fpdfsdk/fpdf_text.cpp +++ b/fpdfsdk/fpdf_text.cpp @@ -10,7 +10,9 @@ #include #include +#include "core/fpdfapi/font/cpdf_font.h" #include "core/fpdfapi/page/cpdf_page.h" +#include "core/fpdfapi/page/cpdf_textobject.h" #include "core/fpdfdoc/cpdf_viewerpreferences.h" #include "core/fpdftext/cpdf_linkextract.h" #include "core/fpdftext/cpdf_textpage.h" @@ -88,6 +90,37 @@ FPDF_EXPORT double FPDF_CALLCONV FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, return charinfo.m_FontSize; } +FPDF_EXPORT unsigned long FPDF_CALLCONV +FPDFText_GetFontInfo(FPDF_TEXTPAGE text_page, + int index, + void* buffer, + unsigned long buflen, + int* flags) { + if (!text_page) + return 0; + CPDF_TextPage* pTextObj = CPDFTextPageFromFPDFTextPage(text_page); + + if (index < 0 || index >= pTextObj->CountChars()) + return 0; + + FPDF_CHAR_INFO charinfo; + pTextObj->GetCharInfo(index, &charinfo); + if (!charinfo.m_pTextObj) + return 0; + + CPDF_Font* font = charinfo.m_pTextObj->GetFont(); + if (!font) + return 0; + + if (flags) + *flags = font->GetFontFlags(); + ByteString basefont = font->GetBaseFont(); + unsigned long length = basefont.GetLength() + 1; + if (buffer && buflen >= length) + memcpy(buffer, basefont.c_str(), length); + return length; +} + FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, int index, double* left, diff --git a/fpdfsdk/fpdf_text_embeddertest.cpp b/fpdfsdk/fpdf_text_embeddertest.cpp index bf064d672a..83b43d95d0 100644 --- a/fpdfsdk/fpdf_text_embeddertest.cpp +++ b/fpdfsdk/fpdf_text_embeddertest.cpp @@ -8,6 +8,7 @@ #include #include "core/fxcrt/fx_memory.h" +#include "core/fxge/fx_font.h" #include "public/cpp/fpdf_scopers.h" #include "public/fpdf_text.h" #include "public/fpdf_transformpage.h" @@ -512,6 +513,88 @@ TEST_F(FPDFTextEmbeddertest, GetFontSize) { UnloadPage(page); } +TEST_F(FPDFTextEmbeddertest, GetFontInfo) { + ASSERT_TRUE(OpenDocument("hello_world.pdf")); + FPDF_PAGE page = LoadPage(0); + ASSERT_TRUE(page); + + FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); + ASSERT_TRUE(textpage); + std::vector font_name; + size_t num_chars1 = strlen("Hello, world!"); + const char kExpectedFontName1[] = "Times-Roman"; + + for (size_t i = 0; i < num_chars1; i++) { + int flags = -1; + unsigned long length = + FPDFText_GetFontInfo(textpage, i, nullptr, 0, &flags); + static constexpr unsigned long expected_length = sizeof(kExpectedFontName1); + ASSERT_EQ(expected_length, length); + EXPECT_EQ(FXFONT_NONSYMBOLIC, flags); + font_name.resize(length); + std::fill(font_name.begin(), font_name.end(), 'a'); + flags = -1; + EXPECT_EQ(expected_length, + FPDFText_GetFontInfo(textpage, i, font_name.data(), + font_name.size(), &flags)); + EXPECT_STREQ(kExpectedFontName1, font_name.data()); + EXPECT_EQ(FXFONT_NONSYMBOLIC, flags); + } + // If the size of the buffer is not large enough, the buffer should remain + // unchanged. + font_name.pop_back(); + std::fill(font_name.begin(), font_name.end(), 'a'); + EXPECT_EQ(sizeof(kExpectedFontName1), + FPDFText_GetFontInfo(textpage, 0, font_name.data(), + font_name.size(), nullptr)); + for (char a : font_name) + EXPECT_EQ('a', a); + + // The text is "Hello, world!\r\nGoodbye, world!", so the next two characters + // do not have any font information. + EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, num_chars1, font_name.data(), + font_name.size(), nullptr)); + EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, num_chars1 + 1, font_name.data(), + font_name.size(), nullptr)); + + size_t num_chars2 = strlen("Goodbye, world!"); + const char kExpectedFontName2[] = "Helvetica"; + for (size_t i = num_chars1 + 2; i < num_chars1 + num_chars2 + 2; i++) { + int flags = -1; + unsigned long length = + FPDFText_GetFontInfo(textpage, i, nullptr, 0, &flags); + static constexpr unsigned long expected_length = sizeof(kExpectedFontName2); + ASSERT_EQ(expected_length, length); + EXPECT_EQ(FXFONT_NONSYMBOLIC, flags); + font_name.resize(length); + std::fill(font_name.begin(), font_name.end(), 'a'); + flags = -1; + EXPECT_EQ(expected_length, + FPDFText_GetFontInfo(textpage, i, font_name.data(), + font_name.size(), &flags)); + EXPECT_STREQ(kExpectedFontName2, font_name.data()); + EXPECT_EQ(FXFONT_NONSYMBOLIC, flags); + } + + // Now try some out of bounds indices and null pointers to make sure we do not + // crash. + // No textpage. + EXPECT_EQ(0u, FPDFText_GetFontInfo(nullptr, 0, font_name.data(), + font_name.size(), nullptr)); + // No buffer. + EXPECT_EQ(sizeof(kExpectedFontName1), + FPDFText_GetFontInfo(textpage, 0, nullptr, 0, nullptr)); + // Negative index. + EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, -1, font_name.data(), + font_name.size(), nullptr)); + // Out of bounds index. + EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, 1000, font_name.data(), + font_name.size(), nullptr)); + + FPDFText_ClosePage(textpage); + UnloadPage(page); +} + TEST_F(FPDFTextEmbeddertest, ToUnicode) { ASSERT_TRUE(OpenDocument("bug_583.pdf")); FPDF_PAGE page = LoadPage(0); diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c index d92ae78d65..799956b36a 100644 --- a/fpdfsdk/fpdf_view_c_api_test.c +++ b/fpdfsdk/fpdf_view_c_api_test.c @@ -324,6 +324,7 @@ int CheckPDFiumCApi() { CHK(FPDFText_GetCharBox); CHK(FPDFText_GetCharIndexAtPos); CHK(FPDFText_GetCharOrigin); + CHK(FPDFText_GetFontInfo); CHK(FPDFText_GetFontSize); CHK(FPDFText_GetRect); CHK(FPDFText_GetSchCount); diff --git a/public/fpdf_text.h b/public/fpdf_text.h index 3502337443..feb54fb5ab 100644 --- a/public/fpdf_text.h +++ b/public/fpdf_text.h @@ -87,6 +87,31 @@ FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int index); FPDF_EXPORT double FPDF_CALLCONV FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, int index); +// Experimental API. +// Function: FPDFText_GetFontInfo +// Get the font name and flags of a particular character. +// Parameters: +// text_page - Handle to a text page information structure. +// Returned by FPDFText_LoadPage function. +// index - Zero-based index of the character. +// buffer - A buffer receiving the font name. +// buflen - The length of |buffer| in bytes. +// flags - Optional pointer to an int receiving the font flags. +// These flags should be interpreted per PDF spec 1.7 Section 5.7.1 +// Font Descriptor Flags. +// Return value: +// On success, return the length of the font name, including the +// trailing NUL character, in bytes. If this length is less than or +// equal to |length|, |buffer| is set to the font name, |flags| is +// set to the font flags. |buffer| is in UTF-8 encoding. Return 0 on +// failure. +FPDF_EXPORT unsigned long FPDF_CALLCONV +FPDFText_GetFontInfo(FPDF_TEXTPAGE text_page, + int index, + void* buffer, + unsigned long buflen, + int* flags); + // Function: FPDFText_GetCharBox // Get bounding box of a particular character. // Parameters: -- cgit v1.2.3