summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fpdfsdk/fpdftext.cpp46
1 files changed, 35 insertions, 11 deletions
diff --git a/fpdfsdk/fpdftext.cpp b/fpdfsdk/fpdftext.cpp
index 9d7d56311f..022b7caa8d 100644
--- a/fpdfsdk/fpdftext.cpp
+++ b/fpdfsdk/fpdftext.cpp
@@ -160,27 +160,51 @@ FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
static_cast<float>(yTolerance)));
}
-FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetText(FPDF_TEXTPAGE text_page,
- int start,
- int count,
+FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetText(FPDF_TEXTPAGE page,
+ int char_start,
+ int char_count,
unsigned short* result) {
- if (start < 0 || count < 0 || !result || !text_page)
+ if (char_start < 0 || char_count < 0 || !result || !page)
return 0;
- CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
- if (start >= textpage->CountChars())
+ CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(page);
+ if (char_start >= textpage->CountChars())
+ return 0;
+
+ char_count = std::min(char_count, textpage->CountChars() - char_start);
+ if (char_count == 0) {
+ // Writting out ""
+ *result = 0;
+ return 1;
+ }
+
+ int char_last = char_start + char_count - 1;
+
+ // char_* values are for a data structure that includes non-printing unicode
+ // characters, where the text_* values are from a data structure that doesn't
+ // include these characters, so translation is needed.
+ int text_start = textpage->TextIndexFromCharIndex(char_start);
+ if (text_start == -1)
+ return 0;
+
+ int text_last = textpage->TextIndexFromCharIndex(char_last);
+ if (text_last == -1)
+ return 0;
+
+ if (text_start > text_last)
return 0;
+ int text_count = text_last - text_start + 1;
- WideString str = textpage->GetPageText(start, count);
- if (str.GetLength() > static_cast<size_t>(count))
- str = str.Left(static_cast<size_t>(count));
+ WideString str = textpage->GetPageText(text_start, text_count);
+ if (str.GetLength() > static_cast<size_t>(text_count))
+ str = str.Left(static_cast<size_t>(text_count));
// UFT16LE_Encode doesn't handle surrogate pairs properly, so it is expected
// the number of items to stay the same.
ByteString byte_str = str.UTF16LE_Encode();
ASSERT((byte_str.GetLength()) / kBytesPerCharacter <=
- static_cast<size_t>(count + 1)); // +1 to account for the string
- // terminator
+ static_cast<size_t>(char_count + 1)); // +1 to account for the string
+ // terminator
memcpy(result, byte_str.GetBuffer(byte_str.GetLength()),
byte_str.GetLength());
return (byte_str.GetLength() / kBytesPerCharacter);