summaryrefslogtreecommitdiff
path: root/core/src/fpdftext
diff options
context:
space:
mode:
Diffstat (limited to 'core/src/fpdftext')
-rw-r--r--core/src/fpdftext/fpdf_text_int.cpp119
1 files changed, 77 insertions, 42 deletions
diff --git a/core/src/fpdftext/fpdf_text_int.cpp b/core/src/fpdftext/fpdf_text_int.cpp
index 22591651ab..a0f0c64822 100644
--- a/core/src/fpdftext/fpdf_text_int.cpp
+++ b/core/src/fpdftext/fpdf_text_int.cpp
@@ -134,6 +134,7 @@ bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) {
return false;
}
}
+
FX_BOOL CPDF_TextPage::ParseTextPage() {
m_bIsParsed = false;
if (!m_pPage)
@@ -186,9 +187,11 @@ FX_BOOL CPDF_TextPage::ParseTextPage() {
}
return TRUE;
}
+
int CPDF_TextPage::CountChars() const {
return pdfium::CollectionSize<int>(m_CharList);
}
+
int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const {
int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
int count = 0;
@@ -199,6 +202,7 @@ int CPDF_TextPage::CharIndexFromTextIndex(int TextIndex) const {
}
return -1;
}
+
int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const {
int indexSize = pdfium::CollectionSize<int>(m_CharIndex);
int count = 0;
@@ -213,6 +217,7 @@ int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const {
}
return -1;
}
+
void CPDF_TextPage::GetRectArray(int start,
int nCount,
CFX_RectArray& rectArray) const {
@@ -298,6 +303,7 @@ void CPDF_TextPage::GetRectArray(int start,
}
rectArray.Add(rect);
}
+
int CPDF_TextPage::GetIndexAtPos(CPDF_Point point,
FX_FLOAT xTolerance,
FX_FLOAT yTolerance) const {
@@ -378,6 +384,7 @@ CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const {
}
return strText;
}
+
void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect,
CFX_RectArray& resRectArray) const {
if (!m_bIsParsed)
@@ -415,6 +422,7 @@ void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect,
}
resRectArray.Add(curRect);
}
+
int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x,
FX_FLOAT y,
FX_FLOAT xTolerance,
@@ -483,6 +491,7 @@ void CPDF_TextPage::CheckMarkedContentObject(int32_t& start,
nCount = endIndex - start + 1;
}
}
+
CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const {
if (!m_bIsParsed || nCount == 0)
return L"";
@@ -532,6 +541,7 @@ CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const {
}
return m_TextBuf.GetWideString().Mid(startindex, nCount);
}
+
int CPDF_TextPage::CountRects(int start, int nCount) {
if (!m_bIsParsed || start < 0)
return -1;
@@ -544,6 +554,7 @@ int CPDF_TextPage::CountRects(int start, int nCount) {
GetRectArray(start, nCount, m_SelRects);
return m_SelRects.GetSize();
}
+
void CPDF_TextPage::GetRect(int rectIndex,
FX_FLOAT& left,
FX_FLOAT& top,
@@ -625,6 +636,7 @@ FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) {
CFX_FloatRect rect = m_SelRects.GetAt(rectIndex);
return GetBaselineRotate(rect, Rotate);
}
+
int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
FX_FLOAT top,
FX_FLOAT right,
@@ -702,6 +714,7 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
}
return m_Segments.GetSize();
}
+
void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const {
if (index < 0 || index >= m_Segments.GetSize()) {
return;
@@ -709,6 +722,7 @@ void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const {
start = m_Segments.GetAt(index).m_Start;
count = m_Segments.GetAt(index).m_nCount;
}
+
int CPDF_TextPage::GetWordBreak(int index, int direction) const {
if (!m_bIsParsed)
return -1;
@@ -740,6 +754,7 @@ int CPDF_TextPage::GetWordBreak(int index, int direction) const {
}
return breakPos;
}
+
int32_t CPDF_TextPage::FindTextlineFlowDirection() {
if (!m_pPage) {
return -1;
@@ -837,6 +852,7 @@ int32_t CPDF_TextPage::FindTextlineFlowDirection() {
}
return -1;
}
+
void CPDF_TextPage::ProcessObject() {
CPDF_PageObject* pPageObj = NULL;
if (!m_pPage) {
@@ -870,6 +886,7 @@ void CPDF_TextPage::ProcessObject() {
m_LineObj.RemoveAll();
CloseTempLine();
}
+
void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj,
const CFX_Matrix& formMatrix) {
CPDF_PageObject* pPageObj = NULL;
@@ -896,6 +913,7 @@ void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj,
pPageObj = NULL;
}
}
+
int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const {
if (charCode == -1) {
return 0;
@@ -913,6 +931,7 @@ int CPDF_TextPage::GetCharWidth(FX_DWORD charCode, CPDF_Font* pFont) const {
}
return w;
}
+
void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) {
CFX_BidiChar::Segment seg = pBidi->GetSegmentInfo();
if (seg.direction == CFX_BidiChar::RIGHT) {
@@ -927,6 +946,7 @@ void CPDF_TextPage::OnPiece(CFX_BidiChar* pBidi, CFX_WideString& str) {
}
}
}
+
void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar,
PAGECHAR_INFO info) {
if (!IsControlChar(info)) {
@@ -954,6 +974,7 @@ void CPDF_TextPage::AddCharInfoByLRDirection(FX_WCHAR wChar,
}
m_CharList.push_back(info);
}
+
void CPDF_TextPage::AddCharInfoByRLDirection(FX_WCHAR wChar,
PAGECHAR_INFO info) {
if (!IsControlChar(info)) {
@@ -1106,21 +1127,21 @@ int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) {
CPDF_TextObject* pTextObj = Obj.m_pTextObj;
CPDF_ContentMarkData* pMarkData =
(CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject();
- if (!pMarkData) {
+ if (!pMarkData)
return FPDFTEXT_MC_PASS;
- }
+
int nContentMark = pMarkData->CountItems();
- if (nContentMark < 1) {
+ if (nContentMark < 1)
return FPDFTEXT_MC_PASS;
- }
CFX_WideString actText;
FX_BOOL bExist = FALSE;
CPDF_Dictionary* pDict = NULL;
int n = 0;
for (n = 0; n < nContentMark; n++) {
CPDF_ContentMarkItem& item = pMarkData->GetItem(n);
- CFX_ByteString tagStr = (CFX_ByteString)item.GetName();
- pDict = ToDictionary(static_cast<CPDF_Object*>(item.GetParam()));
+ if (item.GetParamType() == CPDF_ContentMarkItem::ParamType::None)
+ continue;
+ pDict = item.GetParam();
CPDF_String* temp =
ToString(pDict ? pDict->GetElement("ActualText") : nullptr);
if (temp) {
@@ -1128,38 +1149,32 @@ int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) {
actText = temp->GetUnicodeText();
}
}
- if (!bExist) {
+ if (!bExist)
return FPDFTEXT_MC_PASS;
- }
+
if (m_pPreTextObj) {
- if (CPDF_ContentMarkData* pPreMarkData =
- (CPDF_ContentMarkData*)m_pPreTextObj->m_ContentMark.GetObject()) {
- if (pPreMarkData->CountItems() == n) {
- CPDF_ContentMarkItem& item = pPreMarkData->GetItem(n - 1);
- if (pDict == item.GetParam()) {
- return FPDFTEXT_MC_DONE;
- }
- }
+ CPDF_ContentMarkData* pPreMarkData =
+ (CPDF_ContentMarkData*)m_pPreTextObj->m_ContentMark.GetObject();
+ if (pPreMarkData && pPreMarkData->CountItems() == n &&
+ pDict == pPreMarkData->GetItem(n - 1).GetParam()) {
+ return FPDFTEXT_MC_DONE;
}
}
- CPDF_Font* pFont = pTextObj->GetFont();
FX_STRSIZE nItems = actText.GetLength();
- if (nItems < 1) {
+ if (nItems < 1)
return FPDFTEXT_MC_PASS;
- }
+
+ CPDF_Font* pFont = pTextObj->GetFont();
bExist = FALSE;
for (FX_STRSIZE i = 0; i < nItems; i++) {
- FX_WCHAR wChar = actText.GetAt(i);
- if (-1 == pFont->CharCodeFromUnicode(wChar)) {
- continue;
- } else {
+ if (pFont->CharCodeFromUnicode(actText.GetAt(i)) != -1) {
bExist = TRUE;
break;
}
}
- if (!bExist) {
+ if (!bExist)
return FPDFTEXT_MC_PASS;
- }
+
bExist = FALSE;
for (FX_STRSIZE i = 0; i < nItems; i++) {
FX_WCHAR wChar = actText.GetAt(i);
@@ -1168,39 +1183,36 @@ int32_t CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) {
break;
}
}
- if (!bExist) {
+ if (!bExist)
return FPDFTEXT_MC_DONE;
- }
+
return FPDFTEXT_MC_DELAY;
}
+
void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) {
CPDF_TextObject* pTextObj = Obj.m_pTextObj;
CPDF_ContentMarkData* pMarkData =
(CPDF_ContentMarkData*)pTextObj->m_ContentMark.GetObject();
- if (!pMarkData) {
+ if (!pMarkData)
return;
- }
+
int nContentMark = pMarkData->CountItems();
- if (nContentMark < 1) {
+ if (nContentMark < 1)
return;
- }
CFX_WideString actText;
CPDF_Dictionary* pDict = NULL;
- int n = 0;
- for (n = 0; n < nContentMark; n++) {
+ for (int n = 0; n < nContentMark; n++) {
CPDF_ContentMarkItem& item = pMarkData->GetItem(n);
- CFX_ByteString tagStr = (CFX_ByteString)item.GetName();
- pDict = ToDictionary(static_cast<CPDF_Object*>(item.GetParam()));
- CPDF_String* temp =
- ToString(pDict ? pDict->GetElement("ActualText") : nullptr);
- if (temp) {
- actText = temp->GetUnicodeText();
- }
+ if (item.GetParamType() == CPDF_ContentMarkItem::ParamType::None)
+ continue;
+ pDict = item.GetParam();
+ if (pDict)
+ actText = pDict->GetUnicodeTextBy("ActualText");
}
FX_STRSIZE nItems = actText.GetLength();
- if (nItems < 1) {
+ if (nItems < 1)
return;
- }
+
CPDF_Font* pFont = pTextObj->GetFont();
CFX_Matrix formMatrix = Obj.m_formMatrix;
CFX_Matrix matrix;
@@ -1239,6 +1251,7 @@ void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) {
m_TempCharList.push_back(charinfo);
}
}
+
void CPDF_TextPage::FindPreviousTextObject() {
if (m_TempCharList.empty() && m_CharList.empty())
return;
@@ -1549,6 +1562,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
SwapTempTextBuf(iCharListStartAppend, iBufStartAppend);
}
}
+
int32_t CPDF_TextPage::GetTextObjectWritingMode(
const CPDF_TextObject* pTextObj) {
int32_t nChars = pTextObj->CountChars();
@@ -1614,6 +1628,7 @@ FX_BOOL CPDF_TextPage::IsHyphen(FX_WCHAR curChar) {
}
return FALSE;
}
+
int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj,
const CFX_Matrix& formMatrix) {
FindPreviousTextObject();
@@ -1770,6 +1785,7 @@ int CPDF_TextPage::ProcessInsertObject(const CPDF_TextObject* pObj,
}
return 0;
}
+
FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1,
CPDF_TextObject* pTextObj2) {
if (!pTextObj1 || !pTextObj2) {
@@ -1827,6 +1843,7 @@ FX_BOOL CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1,
}
return TRUE;
}
+
FX_BOOL CPDF_TextPage::IsSameAsPreTextObject(CPDF_TextObject* pTextObj,
FX_POSITION ObjPos) {
if (!pTextObj) {
@@ -1890,6 +1907,7 @@ FX_BOOL CPDF_TextPage::IsRectIntersect(const CFX_FloatRect& rect1,
rect.Intersect(rect2);
return !rect.IsEmpty();
}
+
FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) {
if (unicode < L'A') {
return FALSE;
@@ -1902,6 +1920,7 @@ FX_BOOL CPDF_TextPage::IsLetter(FX_WCHAR unicode) {
}
return TRUE;
}
+
CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage)
: m_pTextPage(pTextPage),
m_flags(0),
@@ -1946,9 +1965,11 @@ CPDF_TextPageFind::CPDF_TextPageFind(const IPDF_TextPage* pTextPage)
m_CharIndex.erase(m_CharIndex.begin() + indexSize - 1);
}
}
+
int CPDF_TextPageFind::GetCharIndex(int index) const {
return m_pTextPage->CharIndexFromTextIndex(index);
}
+
FX_BOOL CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat,
int flags,
int startPos) {
@@ -1999,6 +2020,7 @@ FX_BOOL CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat,
m_resEnd = -1;
return TRUE;
}
+
FX_BOOL CPDF_TextPageFind::FindNext() {
if (!m_pTextPage) {
return FALSE;
@@ -2106,6 +2128,7 @@ FX_BOOL CPDF_TextPageFind::FindNext() {
}
return m_IsFind;
}
+
FX_BOOL CPDF_TextPageFind::FindPrev() {
if (!m_pTextPage) {
return FALSE;
@@ -2151,6 +2174,7 @@ FX_BOOL CPDF_TextPageFind::FindPrev() {
}
return m_IsFind;
}
+
void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) {
if (findwhat.IsEmpty()) {
return;
@@ -2199,6 +2223,7 @@ void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) {
index++;
}
}
+
FX_BOOL CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText,
int startPos,
int endPos) {
@@ -2245,6 +2270,7 @@ FX_BOOL CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText,
}
return TRUE;
}
+
FX_BOOL CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString,
const FX_WCHAR* lpszFullString,
int iSubString,
@@ -2272,6 +2298,7 @@ FX_BOOL CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString,
rString.ReleaseBuffer();
return TRUE;
}
+
CFX_WideString CPDF_TextPageFind::MakeReverse(const CFX_WideString& str) {
CFX_WideString str2;
str2.Empty();
@@ -2281,12 +2308,15 @@ CFX_WideString CPDF_TextPageFind::MakeReverse(const CFX_WideString& str) {
}
return str2;
}
+
void CPDF_TextPageFind::GetRectArray(CFX_RectArray& rects) const {
rects.Copy(m_resArray);
}
+
int CPDF_TextPageFind::GetCurOrder() const {
return GetCharIndex(m_resStart);
}
+
int CPDF_TextPageFind::GetMatchedCount() const {
int resStart = GetCharIndex(m_resStart);
int resEnd = GetCharIndex(m_resEnd);
@@ -2325,12 +2355,14 @@ void CPDF_LinkExtract::DeleteLinkList() {
}
m_LinkList.RemoveAll();
}
+
int CPDF_LinkExtract::CountLinks() const {
if (!m_bIsParsed) {
return -1;
}
return m_LinkList.GetSize();
}
+
void CPDF_LinkExtract::ParseLink() {
int start = 0, pos = 0;
int TotalChar = m_pTextPage->CountChars();
@@ -2366,6 +2398,7 @@ void CPDF_LinkExtract::ParseLink() {
}
}
}
+
FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) {
CFX_WideString str = strBeCheck;
str.MakeLower();
@@ -2392,6 +2425,7 @@ FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) {
}
return FALSE;
}
+
bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {
int aPos = str.Find(L'@');
// Invalid when no '@'.
@@ -2496,6 +2530,7 @@ void CPDF_LinkExtract::GetBoundedSegment(int index,
start = link->m_Start;
count = link->m_Count;
}
+
void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const {
if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
return;