summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorMiklos Vajna <vmiklos@collabora.co.uk>2018-08-07 21:45:34 +0000
committerChromium commit bot <commit-bot@chromium.org>2018-08-07 21:45:34 +0000
commit3bee9c60f013b8b7e99c39ee35699d132b330334 (patch)
treefc00b16bf5c6b84af3c4683e43a2652b80db173b /core
parente1c2f6d7fe7a50280161832799550a3ee8f98088 (diff)
downloadpdfium-3bee9c60f013b8b7e99c39ee35699d132b330334.tar.xz
Add FPDFTextObj_GetText() API
Generalize CPDF_TextPage::GetTextByRect(), so that it's possible to get the text from a text page using a predicate, that way we can easily get the text that belongs to single text object as well. Change-Id: Ia457af0f41184694dc1481709be72b35685bce7f Reviewed-on: https://pdfium-review.googlesource.com/39530 Reviewed-by: Henrique Nakashima <hnakashima@chromium.org> Reviewed-by: Lei Zhang <thestig@chromium.org> Commit-Queue: Lei Zhang <thestig@chromium.org>
Diffstat (limited to 'core')
-rw-r--r--core/fpdftext/cpdf_textpage.cpp18
-rw-r--r--core/fpdftext/cpdf_textpage.h4
2 files changed, 20 insertions, 2 deletions
diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp
index 2894160437..ed7f36fb6c 100644
--- a/core/fpdftext/cpdf_textpage.cpp
+++ b/core/fpdftext/cpdf_textpage.cpp
@@ -426,7 +426,8 @@ int CPDF_TextPage::GetIndexAtPos(const CFX_PointF& point,
return pos < nCount ? pos : NearPos;
}
-WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const {
+WideString CPDF_TextPage::GetTextByPredicate(
+ const std::function<bool(const PAGECHAR_INFO&)>& predicate) const {
if (!m_bIsParsed)
return WideString();
@@ -435,7 +436,7 @@ WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const {
bool IsAddLineFeed = false;
WideString strText;
for (const auto& charinfo : m_CharList) {
- if (IsRectIntersect(rect, charinfo.m_CharBox)) {
+ if (predicate(charinfo)) {
if (fabs(posy - charinfo.m_Origin.y) > 0 && !IsContainPreChar &&
IsAddLineFeed) {
posy = charinfo.m_Origin.y;
@@ -460,6 +461,19 @@ WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const {
return strText;
}
+WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const {
+ return GetTextByPredicate([&rect](const PAGECHAR_INFO& charinfo) {
+ return IsRectIntersect(rect, charinfo.m_CharBox);
+ });
+}
+
+WideString CPDF_TextPage::GetTextByObject(
+ const CPDF_TextObject* pTextObj) const {
+ return GetTextByPredicate([pTextObj](const PAGECHAR_INFO& charinfo) {
+ return charinfo.m_pTextObj == pTextObj;
+ });
+}
+
void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO* info) const {
if (!m_bIsParsed || !pdfium::IndexInBounds(m_CharList, index))
return;
diff --git a/core/fpdftext/cpdf_textpage.h b/core/fpdftext/cpdf_textpage.h
index 36d01854f5..90b45bd96b 100644
--- a/core/fpdftext/cpdf_textpage.h
+++ b/core/fpdftext/cpdf_textpage.h
@@ -8,6 +8,7 @@
#define CORE_FPDFTEXT_CPDF_TEXTPAGE_H_
#include <deque>
+#include <functional>
#include <vector>
#include "core/fpdfapi/page/cpdf_pageobjectlist.h"
@@ -97,6 +98,7 @@ class CPDF_TextPage {
std::vector<CFX_FloatRect> GetRectArray(int start, int nCount) const;
int GetIndexAtPos(const CFX_PointF& point, const CFX_SizeF& tolerance) const;
WideString GetTextByRect(const CFX_FloatRect& rect) const;
+ WideString GetTextByObject(const CPDF_TextObject* pTextObj) const;
// Returns string with the text from |m_TextBuf| that are covered by the input
// range. |start| and |count| are in terms of the |m_CharIndex|, so the range
@@ -151,6 +153,8 @@ class CPDF_TextPage {
TextOrientation FindTextlineFlowOrientation() const;
void AppendGeneratedCharacter(wchar_t unicode, const CFX_Matrix& formMatrix);
void SwapTempTextBuf(int32_t iCharListStartAppend, int32_t iBufStartAppend);
+ WideString GetTextByPredicate(
+ const std::function<bool(const PAGECHAR_INFO&)>& predicate) const;
UnownedPtr<const CPDF_Page> const m_pPage;
std::vector<uint16_t> m_CharIndex;