summaryrefslogtreecommitdiff
path: root/core/fpdftext
diff options
context:
space:
mode:
Diffstat (limited to 'core/fpdftext')
-rw-r--r--core/fpdftext/cpdf_linkextract.cpp2
-rw-r--r--core/fpdftext/cpdf_textpage.cpp56
-rw-r--r--core/fpdftext/cpdf_textpage.h8
-rw-r--r--core/fpdftext/cpdf_textpagefind.cpp4
4 files changed, 27 insertions, 43 deletions
diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp
index 3a38343721..05cbdfb3a7 100644
--- a/core/fpdftext/cpdf_linkextract.cpp
+++ b/core/fpdftext/cpdf_linkextract.cpp
@@ -114,7 +114,7 @@ void CPDF_LinkExtract::ExtractLinks() {
if (!m_pTextPage->IsParsed())
return;
- m_strPageText = m_pTextPage->GetPageText(0, -1);
+ m_strPageText = m_pTextPage->GetAllPageText();
if (m_strPageText.IsEmpty())
return;
diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp
index 7ea2061c26..8ef5522bae 100644
--- a/core/fpdftext/cpdf_textpage.cpp
+++ b/core/fpdftext/cpdf_textpage.cpp
@@ -436,49 +436,27 @@ void CPDF_TextPage::CheckMarkedContentObject(int32_t& start,
}
}
-WideString CPDF_TextPage::GetPageText(int start, int nCount) const {
- if (!m_bIsParsed || nCount == 0)
+WideString CPDF_TextPage::GetPageText(int start, int count) const {
+ if (start < 0 || start >= CountChars() || count <= 0 || !m_bIsParsed ||
+ m_CharList.empty() || m_TextBuf.GetLength() == 0) {
return L"";
-
- if (start < 0)
- start = 0;
-
- if (nCount == -1) {
- nCount = pdfium::CollectionSize<int>(m_CharList) - start;
- WideStringView wsTextBuf = m_TextBuf.AsStringView();
- return WideString(wsTextBuf.Right(wsTextBuf.GetLength() - start));
}
- if (nCount <= 0 || m_CharList.empty())
- return L"";
- if (nCount + start > pdfium::CollectionSize<int>(m_CharList) - 1)
- nCount = pdfium::CollectionSize<int>(m_CharList) - start;
- if (nCount <= 0)
+
+ int text_start = TextIndexFromCharIndex(start);
+ if (text_start < 0)
return L"";
- CheckMarkedContentObject(start, nCount);
- int startindex = 0;
- PAGECHAR_INFO charinfo = m_CharList[start];
- int startOffset = 0;
- while (charinfo.m_Index == -1) {
- startOffset++;
- if (startOffset > nCount ||
- start + startOffset >= pdfium::CollectionSize<int>(m_CharList)) {
- return L"";
- }
- charinfo = m_CharList[start + startOffset];
- }
- startindex = charinfo.m_Index;
- charinfo = m_CharList[start + nCount - 1];
- int nCountOffset = 0;
- while (charinfo.m_Index == -1) {
- nCountOffset++;
- if (nCountOffset >= nCount)
- return L"";
- charinfo = m_CharList[start + nCount - nCountOffset - 1];
- }
- nCount = start + nCount - nCountOffset - startindex;
- if (nCount <= 0)
+
+ count = std::min(count, CountChars() - start);
+
+ int last = start + count - 1;
+ int text_last = TextIndexFromCharIndex(last);
+ if (text_last < 0 || text_last < text_start)
return L"";
- return WideString(m_TextBuf.AsStringView().Mid(startindex, nCount));
+
+ int text_count = text_last - text_start + 1;
+
+ return WideString(m_TextBuf.AsStringView().Mid(
+ static_cast<size_t>(text_start), static_cast<size_t>(text_count)));
}
int CPDF_TextPage::CountRects(int start, int nCount) {
diff --git a/core/fpdftext/cpdf_textpage.h b/core/fpdftext/cpdf_textpage.h
index e8ab82ac2c..cd30ace3ad 100644
--- a/core/fpdftext/cpdf_textpage.h
+++ b/core/fpdftext/cpdf_textpage.h
@@ -103,7 +103,13 @@ class CPDF_TextPage {
std::vector<CFX_FloatRect> GetRectArray(int start, int nCount) const;
int GetIndexAtPos(const CFX_PointF& point, const CFX_SizeF& tolerance) const;
WideString GetTextByRect(const CFX_FloatRect& rect) const;
- WideString GetPageText(int start = 0, int nCount = -1) const;
+
+ // Returns string with the text from |m_TextBuf| that are covered by the input
+ // range. |start| and |count| are in terms of the m_CharIndex, so the range
+ // will be converted into appropriate indices.
+ WideString GetPageText(int start, int count) const;
+ WideString GetAllPageText() const { return GetPageText(0, CountChars()); }
+
int CountRects(int start, int nCount);
void GetRect(int rectIndex,
float& left,
diff --git a/core/fpdftext/cpdf_textpagefind.cpp b/core/fpdftext/cpdf_textpagefind.cpp
index a874521326..9f243a0aee 100644
--- a/core/fpdftext/cpdf_textpagefind.cpp
+++ b/core/fpdftext/cpdf_textpagefind.cpp
@@ -41,7 +41,7 @@ CPDF_TextPageFind::CPDF_TextPageFind(const CPDF_TextPage* pTextPage)
m_resStart(0),
m_resEnd(-1),
m_IsFind(false) {
- m_strText = m_pTextPage->GetPageText();
+ m_strText = m_pTextPage->GetAllPageText();
int nCount = pTextPage->CountChars();
if (nCount)
m_CharIndex.push_back(0);
@@ -85,7 +85,7 @@ bool CPDF_TextPageFind::FindFirst(const WideString& findwhat,
if (!m_pTextPage)
return false;
if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE))
- m_strText = m_pTextPage->GetPageText();
+ m_strText = m_pTextPage->GetAllPageText();
WideString findwhatStr = findwhat;
m_findWhat = findwhatStr;
m_flags = flags;