From 2bf05a6ca144e78223795ae1716875d3c9b8acb1 Mon Sep 17 00:00:00 2001 From: Ryan Harrison Date: Tue, 5 Sep 2017 11:48:55 -0400 Subject: Leave space for null characters when getting text The conversion from WideString to ByeString adds in null characters at the end, so we need to account for these when selecting the range of text to initially extract. BUG=chromium:761770,chromium:761626 Change-Id: Ib8f863e997ebccaaf882e0beb29733f27a18826d Reviewed-on: https://pdfium-review.googlesource.com/13110 Commit-Queue: Ryan Harrison Reviewed-by: dsinclair --- fpdfsdk/fpdftext.cpp | 8 +++++--- fpdfsdk/fpdftext_embeddertest.cpp | 17 +++++++++++++---- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/fpdfsdk/fpdftext.cpp b/fpdfsdk/fpdftext.cpp index 6df593f710..6a030b8ebd 100644 --- a/fpdfsdk/fpdftext.cpp +++ b/fpdfsdk/fpdftext.cpp @@ -29,6 +29,8 @@ namespace { +constexpr size_t kBytesPerCharacter = sizeof(unsigned short); + CPDF_TextPage* CPDFTextPageFromFPDFTextPage(FPDF_TEXTPAGE text_page) { return static_cast(text_page); } @@ -169,19 +171,19 @@ FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetText(FPDF_TEXTPAGE text_page, if (start >= textpage->CountChars()) return 0; - CFX_WideString str = textpage->GetPageText(start, count); + CFX_WideString str = textpage->GetPageText(start, count - 1); if (str.GetLength() <= 0) return 0; // UFT16LE_Encode doesn't handle surrogate pairs properly, so it is expected // the number of items to stay the same. CFX_ByteString cbUTF16str = str.UTF16LE_Encode(); - ASSERT(cbUTF16str.GetLength() / sizeof(unsigned short) <= + ASSERT(cbUTF16str.GetLength() / kBytesPerCharacter <= static_cast(count)); memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()), cbUTF16str.GetLength()); - return cbUTF16str.GetLength() / sizeof(unsigned short); + return cbUTF16str.GetLength() / kBytesPerCharacter; } FPDF_EXPORT int FPDF_CALLCONV FPDFText_CountRects(FPDF_TEXTPAGE text_page, diff --git a/fpdfsdk/fpdftext_embeddertest.cpp b/fpdfsdk/fpdftext_embeddertest.cpp index a83ffe7c29..7e8e033ae0 100644 --- a/fpdfsdk/fpdftext_embeddertest.cpp +++ b/fpdfsdk/fpdftext_embeddertest.cpp @@ -16,13 +16,12 @@ namespace { bool check_unsigned_shorts(const char* expected, const unsigned short* actual, size_t length) { - if (length > strlen(expected) + 1) { + if (length > strlen(expected) + 1) return false; - } + for (size_t i = 0; i < length; ++i) { - if (actual[i] != static_cast(expected[i])) { + if (actual[i] != static_cast(expected[i])) return false; - } } return true; } @@ -64,6 +63,16 @@ TEST_F(FPDFTextEmbeddertest, Text) { << " at " << i; } + // Extracting using a buffer that will be completely filled. Small buffer is + // 12 elements long, since it will need 2 locations per displayed character in + // the expected string, plus 2 more for the terminating character. + static const char small_expected[] = "Hello"; + unsigned short small_buffer[12]; + memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); + EXPECT_EQ(6, FPDFText_GetText(textpage, 0, 6, small_buffer)); + EXPECT_TRUE(check_unsigned_shorts(small_expected, small_buffer, + sizeof(small_expected))); + EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0)); EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15)); -- cgit v1.2.3