From 3fc7fe5e4d8fa257e35e6ae86fc6cf4d6b5016a2 Mon Sep 17 00:00:00 2001 From: Ryan Harrison Date: Mon, 27 Nov 2017 19:30:17 +0000 Subject: Change FPDF_GetText to return "" when asked to get 0 characters BUG=chromium:788103 Change-Id: I8ebdbc78eb14c358d7ac019b96de4828e6071b79 Reviewed-on: https://pdfium-review.googlesource.com/19350 Commit-Queue: Ryan Harrison Reviewed-by: dsinclair --- core/fxcrt/widestring.h | 4 ++++ fpdfsdk/fpdftext.cpp | 21 +++++++++------------ fpdfsdk/fpdftext_embeddertest.cpp | 27 +++++++++++++-------------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/core/fxcrt/widestring.h b/core/fxcrt/widestring.h index f48c32379a..14621ec1a1 100644 --- a/core/fxcrt/widestring.h +++ b/core/fxcrt/widestring.h @@ -182,6 +182,10 @@ class WideString { size_t Remove(wchar_t ch); ByteString UTF8Encode() const; + + // This method will add \0\0 to the end of the string to represent the + // wide string terminator. These values are in the string, not just the data, + // so GetLength() will include them. ByteString UTF16LE_Encode() const; protected: diff --git a/fpdfsdk/fpdftext.cpp b/fpdfsdk/fpdftext.cpp index ae6841febc..9d7d56311f 100644 --- a/fpdfsdk/fpdftext.cpp +++ b/fpdfsdk/fpdftext.cpp @@ -164,29 +164,26 @@ FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetText(FPDF_TEXTPAGE text_page, int start, int count, unsigned short* result) { - if (start < 0 || count < 1 || !result || !text_page) + if (start < 0 || count < 0 || !result || !text_page) return 0; CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); if (start >= textpage->CountChars()) return 0; - WideString str = textpage->GetPageText(start, count - 1); - if (str.GetLength() <= 0) - return 0; - + WideString str = textpage->GetPageText(start, count); if (str.GetLength() > static_cast(count)) str = str.Left(static_cast(count)); // UFT16LE_Encode doesn't handle surrogate pairs properly, so it is expected // the number of items to stay the same. - ByteString cbUTF16str = str.UTF16LE_Encode(); - ASSERT(cbUTF16str.GetLength() / kBytesPerCharacter <= - static_cast(count)); - memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()), - cbUTF16str.GetLength()); - - return cbUTF16str.GetLength() / kBytesPerCharacter; + ByteString byte_str = str.UTF16LE_Encode(); + ASSERT((byte_str.GetLength()) / kBytesPerCharacter <= + static_cast(count + 1)); // +1 to account for the string + // terminator + memcpy(result, byte_str.GetBuffer(byte_str.GetLength()), + byte_str.GetLength()); + return (byte_str.GetLength() / kBytesPerCharacter); } FPDF_EXPORT int FPDF_CALLCONV FPDFText_CountRects(FPDF_TEXTPAGE text_page, diff --git a/fpdfsdk/fpdftext_embeddertest.cpp b/fpdfsdk/fpdftext_embeddertest.cpp index a4431097f1..4b82dc081e 100644 --- a/fpdfsdk/fpdftext_embeddertest.cpp +++ b/fpdfsdk/fpdftext_embeddertest.cpp @@ -42,11 +42,12 @@ TEST_F(FPDFTextEmbeddertest, Text) { unsigned short fixed_buffer[128]; memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); - // Check that unreasonable inputs are handled gracefully + // Check that edge cases are handled gracefully EXPECT_EQ(0, FPDFText_GetText(textpage, 0, 128, nullptr)); EXPECT_EQ(0, FPDFText_GetText(textpage, -1, 128, fixed_buffer)); - EXPECT_EQ(0, FPDFText_GetText(textpage, 0, 0, fixed_buffer)); EXPECT_EQ(0, FPDFText_GetText(textpage, 0, -1, fixed_buffer)); + EXPECT_EQ(1, FPDFText_GetText(textpage, 0, 0, fixed_buffer)); + EXPECT_EQ(0, fixed_buffer[0]); // Check includes the terminating NUL that is provided. int num_chars = FPDFText_GetText(textpage, 0, 128, fixed_buffer); @@ -69,7 +70,7 @@ TEST_F(FPDFTextEmbeddertest, Text) { static const char small_expected[] = "Hello"; unsigned short small_buffer[12]; memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); - EXPECT_EQ(6, FPDFText_GetText(textpage, 0, 6, small_buffer)); + EXPECT_EQ(6, FPDFText_GetText(textpage, 0, 5, small_buffer)); EXPECT_TRUE(check_unsigned_shorts(small_expected, small_buffer, sizeof(small_expected))); @@ -515,7 +516,7 @@ TEST_F(FPDFTextEmbeddertest, Bug_921) { unsigned short buffer[FX_ArraySize(kData) + 1]; memset(buffer, 0xbd, sizeof(buffer)); int count = - FPDFText_GetText(textpage, kStartIndex, FX_ArraySize(buffer), buffer); + FPDFText_GetText(textpage, kStartIndex, FX_ArraySize(kData), buffer); ASSERT_GT(count, 0); ASSERT_EQ(FX_ArraySize(kData) + 1, static_cast(count)); for (size_t i = 0; i < FX_ArraySize(kData); ++i) @@ -542,13 +543,12 @@ TEST_F(FPDFTextEmbeddertest, GetTextWithHyphen) { 0x0056, 0x0065, 0x0072, 0x0069, 0x0074, 0x0061, 0xfffe, 0x0073, 0x0065, 0x0072, 0x0075, 0x006D, 0x0000}; { - constexpr int expected_count = FX_ArraySize(soft_expected); - unsigned short buffer[expected_count]; + constexpr int count = FX_ArraySize(soft_expected) - 1; + unsigned short buffer[FX_ArraySize(soft_expected)]; memset(buffer, 0, sizeof(buffer)); - EXPECT_EQ(expected_count, - FPDFText_GetText(textpage, 0, expected_count, buffer)); - for (int i = 0; i < expected_count; i++) + EXPECT_EQ(count + 1, FPDFText_GetText(textpage, 0, count, buffer)); + for (int i = 0; i < count; i++) EXPECT_EQ(soft_expected[i], buffer[i]); } @@ -562,12 +562,11 @@ TEST_F(FPDFTextEmbeddertest, GetTextWithHyphen) { constexpr unsigned short hard_expected[] = { 0x0055, 0x0073, 0x0065, 0x0072, 0x2010, 0x000d, 0x000a, 0x0067, 0x0065, 0x006e, 0x0065, 0x0072, 0x0061, 0x0074, 0x0065, 0x0064, 0x0000}; - constexpr int expected_count = FX_ArraySize(hard_expected); - unsigned short buffer[expected_count]; + constexpr int count = FX_ArraySize(hard_expected) - 1; + unsigned short buffer[FX_ArraySize(hard_expected)]; - EXPECT_EQ(expected_count, - FPDFText_GetText(textpage, offset, expected_count, buffer)); - for (int i = 0; i < expected_count; i++) + EXPECT_EQ(count + 1, FPDFText_GetText(textpage, offset, count, buffer)); + for (int i = 0; i < count; i++) EXPECT_EQ(hard_expected[i], buffer[i]); } -- cgit v1.2.3