diff options
Diffstat (limited to 'core/fpdftext')
-rw-r--r-- | core/fpdftext/cpdf_linkextract.cpp | 2 | ||||
-rw-r--r-- | core/fpdftext/cpdf_textpage.cpp | 56 | ||||
-rw-r--r-- | core/fpdftext/cpdf_textpage.h | 8 | ||||
-rw-r--r-- | core/fpdftext/cpdf_textpagefind.cpp | 4 |
4 files changed, 27 insertions, 43 deletions
diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp index 3a38343721..05cbdfb3a7 100644 --- a/core/fpdftext/cpdf_linkextract.cpp +++ b/core/fpdftext/cpdf_linkextract.cpp @@ -114,7 +114,7 @@ void CPDF_LinkExtract::ExtractLinks() { if (!m_pTextPage->IsParsed()) return; - m_strPageText = m_pTextPage->GetPageText(0, -1); + m_strPageText = m_pTextPage->GetAllPageText(); if (m_strPageText.IsEmpty()) return; diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp index 7ea2061c26..8ef5522bae 100644 --- a/core/fpdftext/cpdf_textpage.cpp +++ b/core/fpdftext/cpdf_textpage.cpp @@ -436,49 +436,27 @@ void CPDF_TextPage::CheckMarkedContentObject(int32_t& start, } } -WideString CPDF_TextPage::GetPageText(int start, int nCount) const { - if (!m_bIsParsed || nCount == 0) +WideString CPDF_TextPage::GetPageText(int start, int count) const { + if (start < 0 || start >= CountChars() || count <= 0 || !m_bIsParsed || + m_CharList.empty() || m_TextBuf.GetLength() == 0) { return L""; - - if (start < 0) - start = 0; - - if (nCount == -1) { - nCount = pdfium::CollectionSize<int>(m_CharList) - start; - WideStringView wsTextBuf = m_TextBuf.AsStringView(); - return WideString(wsTextBuf.Right(wsTextBuf.GetLength() - start)); } - if (nCount <= 0 || m_CharList.empty()) - return L""; - if (nCount + start > pdfium::CollectionSize<int>(m_CharList) - 1) - nCount = pdfium::CollectionSize<int>(m_CharList) - start; - if (nCount <= 0) + + int text_start = TextIndexFromCharIndex(start); + if (text_start < 0) return L""; - CheckMarkedContentObject(start, nCount); - int startindex = 0; - PAGECHAR_INFO charinfo = m_CharList[start]; - int startOffset = 0; - while (charinfo.m_Index == -1) { - startOffset++; - if (startOffset > nCount || - start + startOffset >= pdfium::CollectionSize<int>(m_CharList)) { - return L""; - } - charinfo = m_CharList[start + startOffset]; - } - startindex = charinfo.m_Index; - charinfo = m_CharList[start + nCount - 1]; - int nCountOffset = 0; - while (charinfo.m_Index == -1) { - nCountOffset++; - if (nCountOffset >= nCount) - return L""; - charinfo = m_CharList[start + nCount - nCountOffset - 1]; - } - nCount = start + nCount - nCountOffset - startindex; - if (nCount <= 0) + + count = std::min(count, CountChars() - start); + + int last = start + count - 1; + int text_last = TextIndexFromCharIndex(last); + if (text_last < 0 || text_last < text_start) return L""; - return WideString(m_TextBuf.AsStringView().Mid(startindex, nCount)); + + int text_count = text_last - text_start + 1; + + return WideString(m_TextBuf.AsStringView().Mid( + static_cast<size_t>(text_start), static_cast<size_t>(text_count))); } int CPDF_TextPage::CountRects(int start, int nCount) { diff --git a/core/fpdftext/cpdf_textpage.h b/core/fpdftext/cpdf_textpage.h index e8ab82ac2c..cd30ace3ad 100644 --- a/core/fpdftext/cpdf_textpage.h +++ b/core/fpdftext/cpdf_textpage.h @@ -103,7 +103,13 @@ class CPDF_TextPage { std::vector<CFX_FloatRect> GetRectArray(int start, int nCount) const; int GetIndexAtPos(const CFX_PointF& point, const CFX_SizeF& tolerance) const; WideString GetTextByRect(const CFX_FloatRect& rect) const; - WideString GetPageText(int start = 0, int nCount = -1) const; + + // Returns string with the text from |m_TextBuf| that are covered by the input + // range. |start| and |count| are in terms of the m_CharIndex, so the range + // will be converted into appropriate indices. + WideString GetPageText(int start, int count) const; + WideString GetAllPageText() const { return GetPageText(0, CountChars()); } + int CountRects(int start, int nCount); void GetRect(int rectIndex, float& left, diff --git a/core/fpdftext/cpdf_textpagefind.cpp b/core/fpdftext/cpdf_textpagefind.cpp index a874521326..9f243a0aee 100644 --- a/core/fpdftext/cpdf_textpagefind.cpp +++ b/core/fpdftext/cpdf_textpagefind.cpp @@ -41,7 +41,7 @@ CPDF_TextPageFind::CPDF_TextPageFind(const CPDF_TextPage* pTextPage) m_resStart(0), m_resEnd(-1), m_IsFind(false) { - m_strText = m_pTextPage->GetPageText(); + m_strText = m_pTextPage->GetAllPageText(); int nCount = pTextPage->CountChars(); if (nCount) m_CharIndex.push_back(0); @@ -85,7 +85,7 @@ bool CPDF_TextPageFind::FindFirst(const WideString& findwhat, if (!m_pTextPage) return false; if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE)) - m_strText = m_pTextPage->GetPageText(); + m_strText = m_pTextPage->GetAllPageText(); WideString findwhatStr = findwhat; m_findWhat = findwhatStr; m_flags = flags; |