From c5ac05726a38d214d399f7be42811d659f9f9d9a Mon Sep 17 00:00:00 2001 From: Ryan Harrison Date: Thu, 31 Aug 2017 16:37:48 -0400 Subject: Make FPDF_GetText stricter on inputs The current implementation of this function is problematic. It will attempt to memcpy to NULL. It will accept obviously wrong inputs like a negative start index. It will also accept -1 for the count, which in theory is the amount of space the buffer has allocated to it, so doesn't make sense, but instead an internal call will calculate the number of characters to get if the count is -1. This will them lead to the function attempting to call Left(-1) on a string, which is invalid. Ths documentation for this function mentions none of this behaviour, so I am removing it, since it is inconsistent/bad. The implementation should now more strictly meet defined API. BUG=pdfium:828 Change-Id: I18afdb33e12d77c10d856b4bacd615481979c484 Reviewed-on: https://pdfium-review.googlesource.com/12733 Commit-Queue: Ryan Harrison Reviewed-by: Tom Sepez --- fpdfsdk/fpdftext.cpp | 11 +++++++---- fpdfsdk/fpdftext_embeddertest.cpp | 6 ++++++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/fpdfsdk/fpdftext.cpp b/fpdfsdk/fpdftext.cpp index 8ed76ea7c1..6df593f710 100644 --- a/fpdfsdk/fpdftext.cpp +++ b/fpdfsdk/fpdftext.cpp @@ -162,7 +162,7 @@ FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetText(FPDF_TEXTPAGE text_page, int start, int count, unsigned short* result) { - if (!text_page) + if (start < 0 || count < 1 || !result || !text_page) return 0; CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); @@ -170,13 +170,16 @@ FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetText(FPDF_TEXTPAGE text_page, return 0; CFX_WideString str = textpage->GetPageText(start, count); - if (str.GetLength() > count) - str = str.Left(count); + if (str.GetLength() <= 0) + return 0; + // UFT16LE_Encode doesn't handle surrogate pairs properly, so it is expected + // the number of items to stay the same. CFX_ByteString cbUTF16str = str.UTF16LE_Encode(); + ASSERT(cbUTF16str.GetLength() / sizeof(unsigned short) <= + static_cast(count)); memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()), cbUTF16str.GetLength()); - cbUTF16str.ReleaseBuffer(cbUTF16str.GetLength()); return cbUTF16str.GetLength() / sizeof(unsigned short); } diff --git a/fpdfsdk/fpdftext_embeddertest.cpp b/fpdfsdk/fpdftext_embeddertest.cpp index e662bbe6e5..a83ffe7c29 100644 --- a/fpdfsdk/fpdftext_embeddertest.cpp +++ b/fpdfsdk/fpdftext_embeddertest.cpp @@ -43,6 +43,12 @@ TEST_F(FPDFTextEmbeddertest, Text) { unsigned short fixed_buffer[128]; memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); + // Check that unreasonable inputs are handled gracefully + EXPECT_EQ(0, FPDFText_GetText(textpage, 0, 128, nullptr)); + EXPECT_EQ(0, FPDFText_GetText(textpage, -1, 128, fixed_buffer)); + EXPECT_EQ(0, FPDFText_GetText(textpage, 0, 0, fixed_buffer)); + EXPECT_EQ(0, FPDFText_GetText(textpage, 0, -1, fixed_buffer)); + // Check includes the terminating NUL that is provided. int num_chars = FPDFText_GetText(textpage, 0, 128, fixed_buffer); ASSERT_GE(num_chars, 0); -- cgit v1.2.3