diff options
author | Ryan Harrison <rharrison@chromium.org> | 2017-09-05 11:48:55 -0400 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2017-09-05 16:20:37 +0000 |
commit | 2bf05a6ca144e78223795ae1716875d3c9b8acb1 (patch) | |
tree | 2a851799ac9cfac5b283a1284ddf96215da6602d | |
parent | 1c9ad7ec7353cccabb6881fd22b116daa3dcbb84 (diff) | |
download | pdfium-2bf05a6ca144e78223795ae1716875d3c9b8acb1.tar.xz |
Leave space for null characters when getting text
The conversion from WideString to ByeString adds in null characters at
the end, so we need to account for these when selecting the range of
text to initially extract.
BUG=chromium:761770,chromium:761626
Change-Id: Ib8f863e997ebccaaf882e0beb29733f27a18826d
Reviewed-on: https://pdfium-review.googlesource.com/13110
Commit-Queue: Ryan Harrison <rharrison@chromium.org>
Reviewed-by: dsinclair <dsinclair@chromium.org>
-rw-r--r-- | fpdfsdk/fpdftext.cpp | 8 | ||||
-rw-r--r-- | fpdfsdk/fpdftext_embeddertest.cpp | 17 |
2 files changed, 18 insertions, 7 deletions
diff --git a/fpdfsdk/fpdftext.cpp b/fpdfsdk/fpdftext.cpp index 6df593f710..6a030b8ebd 100644 --- a/fpdfsdk/fpdftext.cpp +++ b/fpdfsdk/fpdftext.cpp @@ -29,6 +29,8 @@ namespace { +constexpr size_t kBytesPerCharacter = sizeof(unsigned short); + CPDF_TextPage* CPDFTextPageFromFPDFTextPage(FPDF_TEXTPAGE text_page) { return static_cast<CPDF_TextPage*>(text_page); } @@ -169,19 +171,19 @@ FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetText(FPDF_TEXTPAGE text_page, if (start >= textpage->CountChars()) return 0; - CFX_WideString str = textpage->GetPageText(start, count); + CFX_WideString str = textpage->GetPageText(start, count - 1); if (str.GetLength() <= 0) return 0; // UFT16LE_Encode doesn't handle surrogate pairs properly, so it is expected // the number of items to stay the same. CFX_ByteString cbUTF16str = str.UTF16LE_Encode(); - ASSERT(cbUTF16str.GetLength() / sizeof(unsigned short) <= + ASSERT(cbUTF16str.GetLength() / kBytesPerCharacter <= static_cast<size_t>(count)); memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()), cbUTF16str.GetLength()); - return cbUTF16str.GetLength() / sizeof(unsigned short); + return cbUTF16str.GetLength() / kBytesPerCharacter; } FPDF_EXPORT int FPDF_CALLCONV FPDFText_CountRects(FPDF_TEXTPAGE text_page, diff --git a/fpdfsdk/fpdftext_embeddertest.cpp b/fpdfsdk/fpdftext_embeddertest.cpp index a83ffe7c29..7e8e033ae0 100644 --- a/fpdfsdk/fpdftext_embeddertest.cpp +++ b/fpdfsdk/fpdftext_embeddertest.cpp @@ -16,13 +16,12 @@ namespace { bool check_unsigned_shorts(const char* expected, const unsigned short* actual, size_t length) { - if (length > strlen(expected) + 1) { + if (length > strlen(expected) + 1) return false; - } + for (size_t i = 0; i < length; ++i) { - if (actual[i] != static_cast<unsigned short>(expected[i])) { + if (actual[i] != static_cast<unsigned short>(expected[i])) return false; - } } return true; } @@ -64,6 +63,16 @@ TEST_F(FPDFTextEmbeddertest, Text) { << " at " << i; } + // Extracting using a buffer that will be completely filled. Small buffer is + // 12 elements long, since it will need 2 locations per displayed character in + // the expected string, plus 2 more for the terminating character. + static const char small_expected[] = "Hello"; + unsigned short small_buffer[12]; + memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); + EXPECT_EQ(6, FPDFText_GetText(textpage, 0, 6, small_buffer)); + EXPECT_TRUE(check_unsigned_shorts(small_expected, small_buffer, + sizeof(small_expected))); + EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0)); EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15)); |