summaryrefslogtreecommitdiff
path: root/fpdfsdk
diff options
context:
space:
mode:
authorMiklos Vajna <vmiklos@collabora.co.uk>2018-08-07 21:45:34 +0000
committerChromium commit bot <commit-bot@chromium.org>2018-08-07 21:45:34 +0000
commit3bee9c60f013b8b7e99c39ee35699d132b330334 (patch)
treefc00b16bf5c6b84af3c4683e43a2652b80db173b /fpdfsdk
parente1c2f6d7fe7a50280161832799550a3ee8f98088 (diff)
downloadpdfium-3bee9c60f013b8b7e99c39ee35699d132b330334.tar.xz
Add FPDFTextObj_GetText() API
Generalize CPDF_TextPage::GetTextByRect(), so that it's possible to get the text from a text page using a predicate, that way we can easily get the text that belongs to single text object as well. Change-Id: Ia457af0f41184694dc1481709be72b35685bce7f Reviewed-on: https://pdfium-review.googlesource.com/39530 Reviewed-by: Henrique Nakashima <hnakashima@chromium.org> Reviewed-by: Lei Zhang <thestig@chromium.org> Commit-Queue: Lei Zhang <thestig@chromium.org>
Diffstat (limited to 'fpdfsdk')
-rw-r--r--fpdfsdk/fpdf_edittext.cpp18
-rw-r--r--fpdfsdk/fpdf_text_embeddertest.cpp45
-rw-r--r--fpdfsdk/fpdf_view_c_api_test.c1
3 files changed, 64 insertions, 0 deletions
diff --git a/fpdfsdk/fpdf_edittext.cpp b/fpdfsdk/fpdf_edittext.cpp
index 6aa44b3b20..2773763b9a 100644
--- a/fpdfsdk/fpdf_edittext.cpp
+++ b/fpdfsdk/fpdf_edittext.cpp
@@ -22,6 +22,7 @@
#include "core/fpdfapi/parser/cpdf_number.h"
#include "core/fpdfapi/parser/cpdf_reference.h"
#include "core/fpdfapi/parser/cpdf_stream.h"
+#include "core/fpdftext/cpdf_textpage.h"
#include "core/fxcrt/fx_extension.h"
#include "core/fxge/cfx_fontmgr.h"
#include "core/fxge/fx_font.h"
@@ -564,6 +565,23 @@ FPDFTextObj_GetFontName(FPDF_PAGEOBJECT text,
return dwStringLen;
}
+FPDF_EXPORT unsigned long FPDF_CALLCONV
+FPDFTextObj_GetText(FPDF_PAGEOBJECT text_object,
+ FPDF_TEXTPAGE text_page,
+ void* buffer,
+ unsigned long length) {
+ CPDF_TextObject* pTextObj = CPDFTextObjectFromFPDFPageObject(text_object);
+ if (!pTextObj)
+ return 0;
+
+ CPDF_TextPage* pTextPage = CPDFTextPageFromFPDFTextPage(text_page);
+ if (!pTextPage)
+ return 0;
+
+ WideString text = pTextPage->GetTextByObject(pTextObj);
+ return Utf16EncodeMaybeCopyAndReturnLength(text, buffer, length);
+}
+
FPDF_EXPORT void FPDF_CALLCONV FPDFFont_Close(FPDF_FONT font) {
CPDF_Font* pFont = CPDFFontFromFPDFFont(font);
if (!pFont)
diff --git a/fpdfsdk/fpdf_text_embeddertest.cpp b/fpdfsdk/fpdf_text_embeddertest.cpp
index eafe1a2d11..bf064d672a 100644
--- a/fpdfsdk/fpdf_text_embeddertest.cpp
+++ b/fpdfsdk/fpdf_text_embeddertest.cpp
@@ -5,6 +5,7 @@
#include <algorithm>
#include <memory>
#include <utility>
+#include <vector>
#include "core/fxcrt/fx_memory.h"
#include "public/cpp/fpdf_scopers.h"
@@ -762,6 +763,50 @@ TEST_F(FPDFTextEmbeddertest, CountRects) {
UnloadPage(page);
}
+TEST_F(FPDFTextEmbeddertest, GetText) {
+ ASSERT_TRUE(OpenDocument("hello_world.pdf"));
+ FPDF_PAGE page = LoadPage(0);
+ ASSERT_TRUE(page);
+
+ FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
+ ASSERT_TRUE(text_page);
+
+ EXPECT_EQ(2, FPDFPage_CountObjects(page));
+ FPDF_PAGEOBJECT text_object = FPDFPage_GetObject(page, 0);
+ ASSERT_TRUE(text_object);
+
+ // Positive testing.
+ constexpr char kHelloText[] = "Hello, world!";
+ // Return value includes the terminating NUL that is provided.
+ constexpr unsigned long kHelloUTF16Size = FX_ArraySize(kHelloText) * 2;
+ constexpr wchar_t kHelloWideText[] = L"Hello, world!";
+ unsigned long size = FPDFTextObj_GetText(text_object, text_page, nullptr, 0);
+ ASSERT_EQ(kHelloUTF16Size, size);
+
+ std::vector<unsigned short> buffer(size);
+ ASSERT_EQ(size,
+ FPDFTextObj_GetText(text_object, text_page, buffer.data(), size));
+ ASSERT_EQ(kHelloWideText, GetPlatformWString(buffer.data()));
+
+ // Negative testing.
+ ASSERT_EQ(0U, FPDFTextObj_GetText(nullptr, text_page, nullptr, 0));
+ ASSERT_EQ(0U, FPDFTextObj_GetText(text_object, nullptr, nullptr, 0));
+ ASSERT_EQ(0U, FPDFTextObj_GetText(nullptr, nullptr, nullptr, 0));
+
+ // Buffer is too small, ensure it's not modified.
+ buffer.resize(2);
+ buffer[0] = 'x';
+ buffer[1] = '\0';
+ size =
+ FPDFTextObj_GetText(text_object, text_page, buffer.data(), buffer.size());
+ ASSERT_EQ(kHelloUTF16Size, size);
+ ASSERT_EQ('x', buffer[0]);
+ ASSERT_EQ('\0', buffer[1]);
+
+ FPDFText_ClosePage(text_page);
+ UnloadPage(page);
+}
+
TEST_F(FPDFTextEmbeddertest, CroppedText) {
static constexpr int kPageCount = 4;
static constexpr FS_RECTF kBoxes[kPageCount] = {
diff --git a/fpdfsdk/fpdf_view_c_api_test.c b/fpdfsdk/fpdf_view_c_api_test.c
index 91d24fd233..56e9d7abed 100644
--- a/fpdfsdk/fpdf_view_c_api_test.c
+++ b/fpdfsdk/fpdf_view_c_api_test.c
@@ -206,6 +206,7 @@ int CheckPDFiumCApi() {
CHK(FPDFPath_SetStrokeWidth);
CHK(FPDFTextObj_GetFontName);
CHK(FPDFTextObj_GetFontSize);
+ CHK(FPDFTextObj_GetText);
CHK(FPDFText_GetMatrix);
CHK(FPDFText_GetTextRenderMode);
CHK(FPDFText_LoadFont);