From 3bee9c60f013b8b7e99c39ee35699d132b330334 Mon Sep 17 00:00:00 2001 From: Miklos Vajna Date: Tue, 7 Aug 2018 21:45:34 +0000 Subject: Add FPDFTextObj_GetText() API Generalize CPDF_TextPage::GetTextByRect(), so that it's possible to get the text from a text page using a predicate, that way we can easily get the text that belongs to single text object as well. Change-Id: Ia457af0f41184694dc1481709be72b35685bce7f Reviewed-on: https://pdfium-review.googlesource.com/39530 Reviewed-by: Henrique Nakashima Reviewed-by: Lei Zhang Commit-Queue: Lei Zhang --- core/fpdftext/cpdf_textpage.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'core/fpdftext/cpdf_textpage.cpp') diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp index 2894160437..ed7f36fb6c 100644 --- a/core/fpdftext/cpdf_textpage.cpp +++ b/core/fpdftext/cpdf_textpage.cpp @@ -426,7 +426,8 @@ int CPDF_TextPage::GetIndexAtPos(const CFX_PointF& point, return pos < nCount ? pos : NearPos; } -WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { +WideString CPDF_TextPage::GetTextByPredicate( + const std::function& predicate) const { if (!m_bIsParsed) return WideString(); @@ -435,7 +436,7 @@ WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { bool IsAddLineFeed = false; WideString strText; for (const auto& charinfo : m_CharList) { - if (IsRectIntersect(rect, charinfo.m_CharBox)) { + if (predicate(charinfo)) { if (fabs(posy - charinfo.m_Origin.y) > 0 && !IsContainPreChar && IsAddLineFeed) { posy = charinfo.m_Origin.y; @@ -460,6 +461,19 @@ WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { return strText; } +WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { + return GetTextByPredicate([&rect](const PAGECHAR_INFO& charinfo) { + return IsRectIntersect(rect, charinfo.m_CharBox); + }); +} + +WideString CPDF_TextPage::GetTextByObject( + const CPDF_TextObject* pTextObj) const { + return GetTextByPredicate([pTextObj](const PAGECHAR_INFO& charinfo) { + return charinfo.m_pTextObj == pTextObj; + }); +} + void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const { if (!m_bIsParsed || !pdfium::IndexInBounds(m_CharList, index)) return; -- cgit v1.2.3