summaryrefslogtreecommitdiff
path: root/fpdfsdk
diff options
context:
space:
mode:
Diffstat (limited to 'fpdfsdk')
-rw-r--r--fpdfsdk/fpdftext.cpp21
-rw-r--r--fpdfsdk/fpdftext_embeddertest.cpp35
2 files changed, 38 insertions, 18 deletions
diff --git a/fpdfsdk/fpdftext.cpp b/fpdfsdk/fpdftext.cpp
index 5a2deb9a81..d9f7d572e9 100644
--- a/fpdfsdk/fpdftext.cpp
+++ b/fpdfsdk/fpdftext.cpp
@@ -179,25 +179,10 @@ FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetText(FPDF_TEXTPAGE page,
return 1;
}
- // char_* values are for a data structure that includes non-printing unicode
- // characters, where the text_* values are from a data structure that doesn't
- // include these characters, so translation is needed.
- int text_start = textpage->TextIndexFromCharIndex(char_start);
- if (text_start == -1)
- return 0;
-
- int char_last = char_start + char_count - 1;
- int text_last = textpage->TextIndexFromCharIndex(char_last);
- if (text_last == -1)
- return 0;
-
- int text_count = text_last - text_start + 1;
- if (text_count < 1)
- return 0;
+ WideString str = textpage->GetPageText(char_start, char_count);
- WideString str = textpage->GetPageText(text_start, text_count);
- if (str.GetLength() > static_cast<size_t>(text_count))
- str = str.Left(static_cast<size_t>(text_count));
+ if (str.GetLength() > static_cast<size_t>(char_count))
+ str = str.Left(static_cast<size_t>(char_count));
// UFT16LE_Encode doesn't handle surrogate pairs properly, so it is expected
// the number of items to stay the same.
diff --git a/fpdfsdk/fpdftext_embeddertest.cpp b/fpdfsdk/fpdftext_embeddertest.cpp
index 51216b9818..60654057b9 100644
--- a/fpdfsdk/fpdftext_embeddertest.cpp
+++ b/fpdfsdk/fpdftext_embeddertest.cpp
@@ -590,3 +590,38 @@ TEST_F(FPDFTextEmbeddertest, bug_782596) {
FPDFText_ClosePage(textpage);
UnloadPage(page);
}
+
+TEST_F(FPDFTextEmbeddertest, ControlCharacters) {
+ EXPECT_TRUE(OpenDocument("control_characters.pdf"));
+ FPDF_PAGE page = LoadPage(0);
+ EXPECT_TRUE(page);
+
+ FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
+ EXPECT_TRUE(textpage);
+
+ // Should not include the control characters in the output
+ static const char expected[] = "Hello, world!\r\nGoodbye, world!";
+ unsigned short fixed_buffer[128];
+ memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
+ int num_chars = FPDFText_GetText(textpage, 0, 128, fixed_buffer);
+
+ ASSERT_GE(num_chars, 0);
+ EXPECT_EQ(sizeof(expected), static_cast<size_t>(num_chars));
+ EXPECT_TRUE(check_unsigned_shorts(expected, fixed_buffer, sizeof(expected)));
+
+ // Attempting to get a chunk of text after the control characters
+ static const char expected_substring[] = "Goodbye, world!";
+ // Offset is the length of 'Hello, world!\r\n' + 2 control characters in the
+ // original stream
+ static const int offset = 17;
+ memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
+ num_chars = FPDFText_GetText(textpage, offset, 128, fixed_buffer);
+
+ ASSERT_GE(num_chars, 0);
+ EXPECT_EQ(sizeof(expected_substring), static_cast<size_t>(num_chars));
+ EXPECT_TRUE(check_unsigned_shorts(expected_substring, fixed_buffer,
+ sizeof(expected_substring)));
+
+ FPDFText_ClosePage(textpage);
+ UnloadPage(page);
+}