Leave space for null characters when getting text

The conversion from WideString to ByeString adds in null characters at the end, so we need to account for these when selecting the range of text to initially extract. BUG=chromium:761770,chromium:761626 Change-Id: Ib8f863e997ebccaaf882e0beb29733f27a18826d Reviewed-on: https://pdfium-review.googlesource.com/13110 Commit-Queue: Ryan Harrison <rharrison@chromium.org> Reviewed-by: dsinclair <dsinclair@chromium.org>
author: Ryan Harrison <rharrison@chromium.org> 2017-09-05 11:48:55 -0400
committer: Chromium commit bot <commit-bot@chromium.org> 2017-09-05 16:20:37 +0000
commit: 2bf05a6ca144e78223795ae1716875d3c9b8acb1 (patch)
tree: 2a851799ac9cfac5b283a1284ddf96215da6602d
parent: 1c9ad7ec7353cccabb6881fd22b116daa3dcbb84 (diff)
download: pdfium-2bf05a6ca144e78223795ae1716875d3c9b8acb1.tar.xz
2 files changed, 18 insertions, 7 deletions
diff --git a/fpdfsdk/fpdftext.cpp b/fpdfsdk/fpdftext.cpp
index 6df593f710..6a030b8ebd 100644
--- a/fpdfsdk/fpdftext.cpp
+++ b/fpdfsdk/fpdftext.cpp
@@ -29,6 +29,8 @@
 
 namespace {
 
+constexpr size_t kBytesPerCharacter = sizeof(unsigned short);
+
 CPDF_TextPage* CPDFTextPageFromFPDFTextPage(FPDF_TEXTPAGE text_page) {
   return static_cast<CPDF_TextPage*>(text_page);
 }
@@ -169,19 +171,19 @@ FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetText(FPDF_TEXTPAGE text_page,
   if (start >= textpage->CountChars())
     return 0;
 
-  CFX_WideString str = textpage->GetPageText(start, count);
+  CFX_WideString str = textpage->GetPageText(start, count - 1);
   if (str.GetLength() <= 0)
     return 0;
 
   // UFT16LE_Encode doesn't handle surrogate pairs properly, so it is expected
   // the number of items to stay the same.
   CFX_ByteString cbUTF16str = str.UTF16LE_Encode();
-  ASSERT(cbUTF16str.GetLength() / sizeof(unsigned short) <=
+  ASSERT(cbUTF16str.GetLength() / kBytesPerCharacter <=
          static_cast<size_t>(count));
   memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()),
          cbUTF16str.GetLength());
 
-  return cbUTF16str.GetLength() / sizeof(unsigned short);
+  return cbUTF16str.GetLength() / kBytesPerCharacter;
 }
 
 FPDF_EXPORT int FPDF_CALLCONV FPDFText_CountRects(FPDF_TEXTPAGE text_page,
diff --git a/fpdfsdk/fpdftext_embeddertest.cpp b/fpdfsdk/fpdftext_embeddertest.cpp
index a83ffe7c29..7e8e033ae0 100644
--- a/fpdfsdk/fpdftext_embeddertest.cpp
+++ b/fpdfsdk/fpdftext_embeddertest.cpp
@@ -16,13 +16,12 @@ namespace {
 bool check_unsigned_shorts(const char* expected,
                            const unsigned short* actual,
                            size_t length) {
-  if (length > strlen(expected) + 1) {
+  if (length > strlen(expected) + 1)
     return false;
-  }
+
   for (size_t i = 0; i < length; ++i) {
-    if (actual[i] != static_cast<unsigned short>(expected[i])) {
+    if (actual[i] != static_cast<unsigned short>(expected[i]))
       return false;
-    }
   }
   return true;
 }
@@ -64,6 +63,16 @@ TEST_F(FPDFTextEmbeddertest, Text) {
         << " at " << i;
   }
 
+  // Extracting using a buffer that will be completely filled. Small buffer is
+  // 12 elements long, since it will need 2 locations per displayed character in
+  // the expected string, plus 2 more for the terminating character.
+  static const char small_expected[] = "Hello";
+  unsigned short small_buffer[12];
+  memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
+  EXPECT_EQ(6, FPDFText_GetText(textpage, 0, 6, small_buffer));
+  EXPECT_TRUE(check_unsigned_shorts(small_expected, small_buffer,
+                                    sizeof(small_expected)));
+
   EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0));
   EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15));
author	Ryan Harrison <rharrison@chromium.org>	2017-09-05 11:48:55 -0400
committer	Chromium commit bot <commit-bot@chromium.org>	2017-09-05 16:20:37 +0000
commit	2bf05a6ca144e78223795ae1716875d3c9b8acb1 (patch)
tree	2a851799ac9cfac5b283a1284ddf96215da6602d
parent	1c9ad7ec7353cccabb6881fd22b116daa3dcbb84 (diff)
download	pdfium-2bf05a6ca144e78223795ae1716875d3c9b8acb1.tar.xz