diff options
Diffstat (limited to 'fpdfsdk')
-rw-r--r-- | fpdfsdk/fpdftext.cpp | 21 | ||||
-rw-r--r-- | fpdfsdk/fpdftext_embeddertest.cpp | 35 |
2 files changed, 38 insertions, 18 deletions
diff --git a/fpdfsdk/fpdftext.cpp b/fpdfsdk/fpdftext.cpp index 5a2deb9a81..d9f7d572e9 100644 --- a/fpdfsdk/fpdftext.cpp +++ b/fpdfsdk/fpdftext.cpp @@ -179,25 +179,10 @@ FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetText(FPDF_TEXTPAGE page, return 1; } - // char_* values are for a data structure that includes non-printing unicode - // characters, where the text_* values are from a data structure that doesn't - // include these characters, so translation is needed. - int text_start = textpage->TextIndexFromCharIndex(char_start); - if (text_start == -1) - return 0; - - int char_last = char_start + char_count - 1; - int text_last = textpage->TextIndexFromCharIndex(char_last); - if (text_last == -1) - return 0; - - int text_count = text_last - text_start + 1; - if (text_count < 1) - return 0; + WideString str = textpage->GetPageText(char_start, char_count); - WideString str = textpage->GetPageText(text_start, text_count); - if (str.GetLength() > static_cast<size_t>(text_count)) - str = str.Left(static_cast<size_t>(text_count)); + if (str.GetLength() > static_cast<size_t>(char_count)) + str = str.Left(static_cast<size_t>(char_count)); // UFT16LE_Encode doesn't handle surrogate pairs properly, so it is expected // the number of items to stay the same. diff --git a/fpdfsdk/fpdftext_embeddertest.cpp b/fpdfsdk/fpdftext_embeddertest.cpp index 51216b9818..60654057b9 100644 --- a/fpdfsdk/fpdftext_embeddertest.cpp +++ b/fpdfsdk/fpdftext_embeddertest.cpp @@ -590,3 +590,38 @@ TEST_F(FPDFTextEmbeddertest, bug_782596) { FPDFText_ClosePage(textpage); UnloadPage(page); } + +TEST_F(FPDFTextEmbeddertest, ControlCharacters) { + EXPECT_TRUE(OpenDocument("control_characters.pdf")); + FPDF_PAGE page = LoadPage(0); + EXPECT_TRUE(page); + + FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); + EXPECT_TRUE(textpage); + + // Should not include the control characters in the output + static const char expected[] = "Hello, world!\r\nGoodbye, world!"; + unsigned short fixed_buffer[128]; + memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); + int num_chars = FPDFText_GetText(textpage, 0, 128, fixed_buffer); + + ASSERT_GE(num_chars, 0); + EXPECT_EQ(sizeof(expected), static_cast<size_t>(num_chars)); + EXPECT_TRUE(check_unsigned_shorts(expected, fixed_buffer, sizeof(expected))); + + // Attempting to get a chunk of text after the control characters + static const char expected_substring[] = "Goodbye, world!"; + // Offset is the length of 'Hello, world!\r\n' + 2 control characters in the + // original stream + static const int offset = 17; + memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); + num_chars = FPDFText_GetText(textpage, offset, 128, fixed_buffer); + + ASSERT_GE(num_chars, 0); + EXPECT_EQ(sizeof(expected_substring), static_cast<size_t>(num_chars)); + EXPECT_TRUE(check_unsigned_shorts(expected_substring, fixed_buffer, + sizeof(expected_substring))); + + FPDFText_ClosePage(textpage); + UnloadPage(page); +} |