summaryrefslogtreecommitdiff
path: root/fpdfsdk
diff options
context:
space:
mode:
authorLei Zhang <thestig@chromium.org>2018-06-08 15:31:10 +0000
committerChromium commit bot <commit-bot@chromium.org>2018-06-08 15:31:10 +0000
commit49fa50d7e922746c02f7b70c8436466d7f62696a (patch)
tree6a66f2a89d5f4c18cd5c507b02a79ae97f18c9c7 /fpdfsdk
parenta0608aaa5765da478af969db50a30a84f0cc3813 (diff)
downloadpdfium-49fa50d7e922746c02f7b70c8436466d7f62696a.tar.xz
Use FPDFText_GetBoundedText() to get the visible text in a test.
Add a test PDF with multiple pages, each with a different media box and crop box. Demonstrate how FPDFText_GetText() gets all the text on the page, and how FPDFText_GetBoundedText() with the right bounding boxes gets only the visible text on the page. Also fix a small nit in CPDF_TextPage::GetTextByRect() found while writing this CL. BUG=pdfium:387 Change-Id: I9ce4bb181e2ba5b454ea1341bbccef9ba94c9cd8 Reviewed-on: https://pdfium-review.googlesource.com/34550 Commit-Queue: Ryan Harrison <rharrison@chromium.org> Reviewed-by: Ryan Harrison <rharrison@chromium.org>
Diffstat (limited to 'fpdfsdk')
-rw-r--r--fpdfsdk/fpdf_text_embeddertest.cpp61
1 files changed, 60 insertions, 1 deletions
diff --git a/fpdfsdk/fpdf_text_embeddertest.cpp b/fpdfsdk/fpdf_text_embeddertest.cpp
index c7ad8250df..112991f896 100644
--- a/fpdfsdk/fpdf_text_embeddertest.cpp
+++ b/fpdfsdk/fpdf_text_embeddertest.cpp
@@ -2,10 +2,13 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
+#include <algorithm>
#include <memory>
+#include <utility>
#include "core/fxcrt/fx_memory.h"
#include "public/fpdf_text.h"
+#include "public/fpdf_transformpage.h"
#include "public/fpdfview.h"
#include "testing/embedder_test.h"
#include "testing/gtest/include/gtest/gtest.h"
@@ -159,7 +162,8 @@ TEST_F(FPDFTextEmbeddertest, Text) {
EXPECT_EQ(0.0, bottom);
EXPECT_EQ(0.0, top);
- EXPECT_EQ(9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, 0, 0));
+ EXPECT_EQ(
+ 9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, nullptr, 0));
// Extract starting at character 4 as above.
memset(buffer, 0xbd, sizeof(buffer));
@@ -751,3 +755,58 @@ TEST_F(FPDFTextEmbeddertest, CountRects) {
FPDFText_ClosePage(textpage);
UnloadPage(page);
}
+
+TEST_F(FPDFTextEmbeddertest, CroppedText) {
+ static constexpr int kPageCount = 4;
+ static constexpr FS_RECTF kBoxes[kPageCount] = {
+ {50.0f, 150.0f, 150.0f, 50.0f},
+ {50.0f, 150.0f, 150.0f, 50.0f},
+ {60.0f, 150.0f, 150.0f, 60.0f},
+ {60.0f, 150.0f, 150.0f, 60.0f},
+ };
+ static constexpr const char* kExpectedText[kPageCount] = {
+ " world!\r\ndbye, world!", " world!\r\ndbye, world!", "bye, world!",
+ "bye, world!",
+ };
+
+ ASSERT_TRUE(OpenDocument("cropped_text.pdf"));
+ ASSERT_EQ(kPageCount, FPDF_GetPageCount(document()));
+
+ for (int i = 0; i < kPageCount; ++i) {
+ FPDF_PAGE page = LoadPage(i);
+ ASSERT_TRUE(page);
+
+ FS_RECTF box;
+ EXPECT_TRUE(FPDF_GetPageBoundingBox(page, &box));
+ EXPECT_EQ(kBoxes[i].left, box.left);
+ EXPECT_EQ(kBoxes[i].top, box.top);
+ EXPECT_EQ(kBoxes[i].right, box.right);
+ EXPECT_EQ(kBoxes[i].bottom, box.bottom);
+
+ {
+ ScopedFPDFTextPage textpage(FPDFText_LoadPage(page));
+ ASSERT_TRUE(textpage);
+
+ unsigned short buffer[128];
+ memset(buffer, 0xbd, sizeof(buffer));
+ int num_chars = FPDFText_GetText(textpage.get(), 0, 128, buffer);
+ ASSERT_EQ(kHelloGoodbyeTextSize, num_chars);
+ EXPECT_TRUE(check_unsigned_shorts(kHelloGoodbyeText, buffer,
+ kHelloGoodbyeTextSize));
+
+ int expected_char_count = strlen(kExpectedText[i]);
+ ASSERT_EQ(expected_char_count,
+ FPDFText_GetBoundedText(textpage.get(), box.left, box.top,
+ box.right, box.bottom, nullptr, 0));
+
+ memset(buffer, 0xbd, sizeof(buffer));
+ ASSERT_EQ(expected_char_count + 1,
+ FPDFText_GetBoundedText(textpage.get(), box.left, box.top,
+ box.right, box.bottom, buffer, 128));
+ EXPECT_TRUE(
+ check_unsigned_shorts(kExpectedText[i], buffer, expected_char_count));
+ }
+
+ UnloadPage(page);
+ }
+}