summaryrefslogtreecommitdiff
path: root/core/fpdftext
diff options
context:
space:
mode:
Diffstat (limited to 'core/fpdftext')
-rw-r--r--core/fpdftext/fpdf_text_int.cpp184
-rw-r--r--core/fpdftext/fpdf_text_int_unittest.cpp3
-rw-r--r--core/fpdftext/include/cpdf_linkextract.h40
-rw-r--r--core/fpdftext/include/cpdf_textpage.h6
4 files changed, 65 insertions, 168 deletions
diff --git a/core/fpdftext/fpdf_text_int.cpp b/core/fpdftext/fpdf_text_int.cpp
index 741331fb77..4db4d5c09f 100644
--- a/core/fpdftext/fpdf_text_int.cpp
+++ b/core/fpdftext/fpdf_text_int.cpp
@@ -262,7 +262,7 @@ int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const {
void CPDF_TextPage::GetRectArray(int start,
int nCount,
- CFX_RectArray& rectArray) const {
+ CFX_RectArray* rectArray) const {
if (start < 0 || nCount == 0) {
return;
}
@@ -290,7 +290,7 @@ void CPDF_TextPage::GetRectArray(int start,
pCurObj = info_curchar.m_pTextObj;
}
if (pCurObj != info_curchar.m_pTextObj) {
- rectArray.Add(rect);
+ rectArray->Add(rect);
pCurObj = info_curchar.m_pTextObj;
flagNewRect = TRUE;
}
@@ -343,7 +343,7 @@ void CPDF_TextPage::GetRectArray(int start,
}
}
}
- rectArray.Add(rect);
+ rectArray->Add(rect);
}
int CPDF_TextPage::GetIndexAtPos(CFX_FloatPoint point,
@@ -592,7 +592,7 @@ int CPDF_TextPage::CountRects(int start, int nCount) {
nCount = pdfium::CollectionSize<int>(m_CharList) - start;
}
m_SelRects.RemoveAll();
- GetRectArray(start, nCount, m_SelRects);
+ GetRectArray(start, nCount, &m_SelRects);
return m_SelRects.GetSize();
}
@@ -649,35 +649,6 @@ FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) {
return TRUE;
}
-FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect,
- int& Rotate) {
- int start, end, count,
- n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom,
- TRUE);
- if (n < 1) {
- return FALSE;
- }
- if (n > 1) {
- GetBoundedSegment(n - 1, start, count);
- end = start + count - 1;
- GetBoundedSegment(0, start, count);
- } else {
- GetBoundedSegment(0, start, count);
- end = start + count - 1;
- }
- return GetBaselineRotate(start, end, Rotate);
-}
-FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) {
- if (!m_bIsParsed)
- return FALSE;
-
- if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize())
- return FALSE;
-
- CFX_FloatRect rect = m_SelRects.GetAt(rectIndex);
- return GetBaselineRotate(rect, Rotate);
-}
-
int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
FX_FLOAT top,
FX_FLOAT right,
@@ -756,14 +727,6 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
return m_Segments.GetSize();
}
-void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const {
- if (index < 0 || index >= m_Segments.GetSize()) {
- return;
- }
- start = m_Segments.GetAt(index).m_Start;
- count = m_Segments.GetAt(index).m_nCount;
-}
-
int CPDF_TextPage::GetWordBreak(int index, int direction) const {
if (!m_bIsParsed)
return -1;
@@ -2126,7 +2089,7 @@ FX_BOOL CPDF_TextPageFind::FindNext() {
m_IsFind = TRUE;
int resStart = GetCharIndex(m_resStart);
int resEnd = GetCharIndex(m_resEnd);
- m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, m_resArray);
+ m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, &m_resArray);
if (m_flags & FPDFTEXT_CONSECUTIVE) {
m_findNextStart = m_resStart + 1;
m_findPreStart = m_resEnd - 1;
@@ -2172,7 +2135,7 @@ FX_BOOL CPDF_TextPageFind::FindPrev() {
m_resStart = m_pTextPage->TextIndexFromCharIndex(order);
m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1);
m_IsFind = TRUE;
- m_pTextPage->GetRectArray(order, MatchedCount, m_resArray);
+ m_pTextPage->GetRectArray(order, MatchedCount, &m_resArray);
if (m_flags & FPDFTEXT_CONSECUTIVE) {
m_findNextStart = m_resStart + 1;
m_findPreStart = m_resEnd - 1;
@@ -2330,43 +2293,22 @@ int CPDF_TextPageFind::GetMatchedCount() const {
return resEnd - resStart + 1;
}
-CPDF_LinkExtract::CPDF_LinkExtract()
- : m_pTextPage(nullptr), m_bIsParsed(false) {}
+CPDF_LinkExtract::CPDF_LinkExtract(const CPDF_TextPage* pTextPage)
+ : m_pTextPage(pTextPage) {}
CPDF_LinkExtract::~CPDF_LinkExtract() {
- DeleteLinkList();
}
-FX_BOOL CPDF_LinkExtract::ExtractLinks(const CPDF_TextPage* pTextPage) {
- if (!pTextPage || !pTextPage->IsParsed())
- return FALSE;
+void CPDF_LinkExtract::ExtractLinks() {
+ m_LinkArray.clear();
+ if (!m_pTextPage->IsParsed())
+ return;
- m_pTextPage = (const CPDF_TextPage*)pTextPage;
m_strPageText = m_pTextPage->GetPageText(0, -1);
- DeleteLinkList();
- if (m_strPageText.IsEmpty()) {
- return FALSE;
- }
- ParseLink();
- m_bIsParsed = true;
- return TRUE;
-}
-
-void CPDF_LinkExtract::DeleteLinkList() {
- while (m_LinkList.GetSize()) {
- CPDF_LinkExt* linkinfo = NULL;
- linkinfo = m_LinkList.GetAt(0);
- m_LinkList.RemoveAt(0);
- delete linkinfo;
- }
- m_LinkList.RemoveAll();
-}
+ if (m_strPageText.IsEmpty())
+ return;
-int CPDF_LinkExtract::CountLinks() const {
- if (!m_bIsParsed) {
- return -1;
- }
- return m_LinkList.GetSize();
+ ParseLink();
}
void CPDF_LinkExtract::ParseLink() {
@@ -2395,7 +2337,7 @@ void CPDF_LinkExtract::ParseLink() {
}
if (nCount > 5 &&
(CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) {
- AppendToLinkList(start, nCount, strBeCheck);
+ m_LinkArray.push_back({start, nCount, strBeCheck});
}
}
start = ++pos;
@@ -2405,47 +2347,46 @@ void CPDF_LinkExtract::ParseLink() {
}
}
-FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) {
+bool CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) {
CFX_WideString str = strBeCheck;
str.MakeLower();
if (str.Find(L"http://www.") != -1) {
strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www."));
- return TRUE;
+ return true;
}
if (str.Find(L"http://") != -1) {
strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://"));
- return TRUE;
+ return true;
}
if (str.Find(L"https://www.") != -1) {
strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www."));
- return TRUE;
+ return true;
}
if (str.Find(L"https://") != -1) {
strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://"));
- return TRUE;
+ return true;
}
if (str.Find(L"www.") != -1) {
strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www."));
strBeCheck = L"http://" + strBeCheck;
- return TRUE;
+ return true;
}
- return FALSE;
+ return false;
}
bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {
int aPos = str.Find(L'@');
// Invalid when no '@'.
- if (aPos < 1) {
- return FALSE;
- }
+ if (aPos < 1)
+ return false;
// Check the local part.
int pPos = aPos; // Used to track the position of '@' or '.'.
for (int i = aPos - 1; i >= 0; i--) {
FX_WCHAR ch = str.GetAt(i);
- if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch)) {
+ if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch))
continue;
- }
+
if (ch != L'.' || i == pPos - 1 || i == 0) {
if (i == aPos - 1) {
// There is '.' or invalid char before '@'.
@@ -2463,25 +2404,25 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {
// Check the domain name part.
aPos = str.Find(L'@');
- if (aPos < 1) {
- return FALSE;
- }
+ if (aPos < 1)
+ return false;
+
str.TrimRight(L'.');
// At least one '.' in domain name, but not at the beginning.
// TODO(weili): RFC5322 allows domain names to be a local name without '.'.
// Check whether we should remove this check.
int ePos = str.Find(L'.', aPos + 1);
- if (ePos == -1 || ePos == aPos + 1) {
- return FALSE;
- }
+ if (ePos == -1 || ePos == aPos + 1)
+ return false;
+
// Validate all other chars in domain name.
int nLen = str.GetLength();
pPos = 0; // Used to track the position of '.'.
for (int i = aPos + 1; i < nLen; i++) {
FX_WCHAR wch = str.GetAt(i);
- if (wch == L'-' || FXSYS_iswalnum(wch)) {
+ if (wch == L'-' || FXSYS_iswalnum(wch))
continue;
- }
+
if (wch != L'.' || i == pPos + 1) {
// Domain name should end before invalid char.
int host_end = i == pPos + 1 ? i - 2 : i - 1;
@@ -2490,61 +2431,24 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {
str = str.Left(host_end + 1);
break;
}
- return FALSE;
+ return false;
}
pPos = i;
}
- if (str.Find(L"mailto:") == -1) {
+ if (str.Find(L"mailto:") == -1)
str = L"mailto:" + str;
- }
- return TRUE;
-}
-void CPDF_LinkExtract::AppendToLinkList(int start,
- int count,
- const CFX_WideString& strUrl) {
- CPDF_LinkExt* linkInfo = new CPDF_LinkExt;
- linkInfo->m_strUrl = strUrl;
- linkInfo->m_Start = start;
- linkInfo->m_Count = count;
- m_LinkList.Add(linkInfo);
+ return true;
}
-CFX_WideString CPDF_LinkExtract::GetURL(int index) const {
- if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
- return L"";
- }
- CPDF_LinkExt* link = NULL;
- link = m_LinkList.GetAt(index);
- if (!link) {
- return L"";
- }
- return link->m_strUrl;
-}
-void CPDF_LinkExtract::GetBoundedSegment(int index,
- int& start,
- int& count) const {
- if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
- return;
- }
- CPDF_LinkExt* link = NULL;
- link = m_LinkList.GetAt(index);
- if (!link) {
- return;
- }
- start = link->m_Start;
- count = link->m_Count;
+CFX_WideString CPDF_LinkExtract::GetURL(size_t index) const {
+ return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L"";
}
-void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const {
- if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
- return;
- }
- CPDF_LinkExt* link = NULL;
- link = m_LinkList.GetAt(index);
- if (!link) {
- return;
+void CPDF_LinkExtract::GetRects(size_t index, CFX_RectArray* pRects) const {
+ if (index < m_LinkArray.size()) {
+ m_pTextPage->GetRectArray(m_LinkArray[index].m_Start,
+ m_LinkArray[index].m_Count, pRects);
}
- m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects);
}
diff --git a/core/fpdftext/fpdf_text_int_unittest.cpp b/core/fpdftext/fpdf_text_int_unittest.cpp
index e1dd0f7504..0ee11d2d57 100644
--- a/core/fpdftext/fpdf_text_int_unittest.cpp
+++ b/core/fpdftext/fpdf_text_int_unittest.cpp
@@ -8,6 +8,9 @@
// Class to help test functions in CPDF_LinkExtract class.
class CPDF_TestLinkExtract : public CPDF_LinkExtract {
+ public:
+ CPDF_TestLinkExtract() : CPDF_LinkExtract(nullptr) {}
+
private:
// Add test cases as friends to access protected member functions.
// Access CheckMailLink.
diff --git a/core/fpdftext/include/cpdf_linkextract.h b/core/fpdftext/include/cpdf_linkextract.h
index 263768ee5d..4f9537c799 100644
--- a/core/fpdftext/include/cpdf_linkextract.h
+++ b/core/fpdftext/include/cpdf_linkextract.h
@@ -7,6 +7,8 @@
#ifndef CORE_FPDFTEXT_INCLUDE_CPDF_LINKEXTRACT_H_
#define CORE_FPDFTEXT_INCLUDE_CPDF_LINKEXTRACT_H_
+#include <vector>
+
#include "core/fxcrt/include/fx_basic.h"
#include "core/fxcrt/include/fx_coordinates.h"
#include "core/fxcrt/include/fx_string.h"
@@ -14,41 +16,31 @@
class CPDF_TextPage;
-class CPDF_LinkExt {
- public:
- CPDF_LinkExt() {}
- ~CPDF_LinkExt() {}
-
- int m_Start;
- int m_Count;
- CFX_WideString m_strUrl;
-};
-
class CPDF_LinkExtract {
public:
- CPDF_LinkExtract();
+ explicit CPDF_LinkExtract(const CPDF_TextPage* pTextPage);
~CPDF_LinkExtract();
- FX_BOOL ExtractLinks(const CPDF_TextPage* pTextPage);
- int CountLinks() const;
- CFX_WideString GetURL(int index) const;
- void GetBoundedSegment(int index, int& start, int& count) const;
- void GetRects(int index, CFX_RectArray& rects) const;
-
- FX_BOOL IsExtract() const { return m_bIsParsed; }
+ void ExtractLinks();
+ size_t CountLinks() const { return m_LinkArray.size(); }
+ CFX_WideString GetURL(size_t index) const;
+ void GetRects(size_t index, CFX_RectArray* pRects) const;
protected:
void ParseLink();
- void DeleteLinkList();
- FX_BOOL CheckWebLink(CFX_WideString& strBeCheck);
+ bool CheckWebLink(CFX_WideString& str);
bool CheckMailLink(CFX_WideString& str);
- void AppendToLinkList(int start, int count, const CFX_WideString& strUrl);
private:
- CFX_ArrayTemplate<CPDF_LinkExt*> m_LinkList;
- const CPDF_TextPage* m_pTextPage;
+ struct Link {
+ int m_Start;
+ int m_Count;
+ CFX_WideString m_strUrl;
+ };
+
+ const CPDF_TextPage* const m_pTextPage;
CFX_WideString m_strPageText;
- bool m_bIsParsed;
+ std::vector<Link> m_LinkArray;
};
#endif // CORE_FPDFTEXT_INCLUDE_CPDF_LINKEXTRACT_H_
diff --git a/core/fpdftext/include/cpdf_textpage.h b/core/fpdftext/include/cpdf_textpage.h
index 19e8791b5a..8d608b0bf0 100644
--- a/core/fpdftext/include/cpdf_textpage.h
+++ b/core/fpdftext/include/cpdf_textpage.h
@@ -67,7 +67,7 @@ class CPDF_TextPage {
int TextIndexFromCharIndex(int CharIndex) const;
int CountChars() const;
void GetCharInfo(int index, FPDF_CHAR_INFO* info) const;
- void GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const;
+ void GetRectArray(int start, int nCount, CFX_RectArray* rectArray) const;
int GetIndexAtPos(CFX_FloatPoint point,
FX_FLOAT xTolerance,
FX_FLOAT yTolerance) const;
@@ -85,14 +85,12 @@ class CPDF_TextPage {
FX_FLOAT& top,
FX_FLOAT& right,
FX_FLOAT& bottom) const;
- FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate);
- FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate);
int CountBoundedSegments(FX_FLOAT left,
FX_FLOAT top,
FX_FLOAT right,
FX_FLOAT bottom,
FX_BOOL bContains = FALSE);
- void GetBoundedSegment(int index, int& start, int& count) const;
+
int GetWordBreak(int index, int direction) const;
static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1,