diff options
author | Miklos Vajna <vmiklos@collabora.co.uk> | 2018-08-07 21:45:34 +0000 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2018-08-07 21:45:34 +0000 |
commit | 3bee9c60f013b8b7e99c39ee35699d132b330334 (patch) | |
tree | fc00b16bf5c6b84af3c4683e43a2652b80db173b /core | |
parent | e1c2f6d7fe7a50280161832799550a3ee8f98088 (diff) | |
download | pdfium-3bee9c60f013b8b7e99c39ee35699d132b330334.tar.xz |
Add FPDFTextObj_GetText() API
Generalize CPDF_TextPage::GetTextByRect(), so that it's possible to get
the text from a text page using a predicate, that way we can easily
get the text that belongs to single text object as well.
Change-Id: Ia457af0f41184694dc1481709be72b35685bce7f
Reviewed-on: https://pdfium-review.googlesource.com/39530
Reviewed-by: Henrique Nakashima <hnakashima@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
Diffstat (limited to 'core')
-rw-r--r-- | core/fpdftext/cpdf_textpage.cpp | 18 | ||||
-rw-r--r-- | core/fpdftext/cpdf_textpage.h | 4 |
2 files changed, 20 insertions, 2 deletions
diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp index 2894160437..ed7f36fb6c 100644 --- a/core/fpdftext/cpdf_textpage.cpp +++ b/core/fpdftext/cpdf_textpage.cpp @@ -426,7 +426,8 @@ int CPDF_TextPage::GetIndexAtPos(const CFX_PointF& point, return pos < nCount ? pos : NearPos; } -WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { +WideString CPDF_TextPage::GetTextByPredicate( + const std::function<bool(const PAGECHAR_INFO&)>& predicate) const { if (!m_bIsParsed) return WideString(); @@ -435,7 +436,7 @@ WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { bool IsAddLineFeed = false; WideString strText; for (const auto& charinfo : m_CharList) { - if (IsRectIntersect(rect, charinfo.m_CharBox)) { + if (predicate(charinfo)) { if (fabs(posy - charinfo.m_Origin.y) > 0 && !IsContainPreChar && IsAddLineFeed) { posy = charinfo.m_Origin.y; @@ -460,6 +461,19 @@ WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { return strText; } +WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { + return GetTextByPredicate([&rect](const PAGECHAR_INFO& charinfo) { + return IsRectIntersect(rect, charinfo.m_CharBox); + }); +} + +WideString CPDF_TextPage::GetTextByObject( + const CPDF_TextObject* pTextObj) const { + return GetTextByPredicate([pTextObj](const PAGECHAR_INFO& charinfo) { + return charinfo.m_pTextObj == pTextObj; + }); +} + void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const { if (!m_bIsParsed || !pdfium::IndexInBounds(m_CharList, index)) return; diff --git a/core/fpdftext/cpdf_textpage.h b/core/fpdftext/cpdf_textpage.h index 36d01854f5..90b45bd96b 100644 --- a/core/fpdftext/cpdf_textpage.h +++ b/core/fpdftext/cpdf_textpage.h @@ -8,6 +8,7 @@ #define CORE_FPDFTEXT_CPDF_TEXTPAGE_H_ #include <deque> +#include <functional> #include <vector> #include "core/fpdfapi/page/cpdf_pageobjectlist.h" @@ -97,6 +98,7 @@ class CPDF_TextPage { std::vector<CFX_FloatRect> GetRectArray(int start, int nCount) const; int GetIndexAtPos(const CFX_PointF& point, const CFX_SizeF& tolerance) const; WideString GetTextByRect(const CFX_FloatRect& rect) const; + WideString GetTextByObject(const CPDF_TextObject* pTextObj) const; // Returns string with the text from |m_TextBuf| that are covered by the input // range. |start| and |count| are in terms of the |m_CharIndex|, so the range @@ -151,6 +153,8 @@ class CPDF_TextPage { TextOrientation FindTextlineFlowOrientation() const; void AppendGeneratedCharacter(wchar_t unicode, const CFX_Matrix& formMatrix); void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend); + WideString GetTextByPredicate( + const std::function<bool(const PAGECHAR_INFO&)>& predicate) const; UnownedPtr<const CPDF_Page> const m_pPage; std::vector<uint16_t> m_CharIndex; |