summaryrefslogtreecommitdiff
path: root/core/src/fpdftext
diff options
context:
space:
mode:
Diffstat (limited to 'core/src/fpdftext')
-rw-r--r--core/src/fpdftext/fpdf_text_int.cpp159
-rw-r--r--core/src/fpdftext/text_int.h18
2 files changed, 81 insertions, 96 deletions
diff --git a/core/src/fpdftext/fpdf_text_int.cpp b/core/src/fpdftext/fpdf_text_int.cpp
index 1b476d7b1f..aa25728c15 100644
--- a/core/src/fpdftext/fpdf_text_int.cpp
+++ b/core/src/fpdftext/fpdf_text_int.cpp
@@ -96,10 +96,7 @@ IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_PageObjects* pObjs,
}
IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind(
const IPDF_TextPage* pTextPage) {
- if (!pTextPage) {
- return NULL;
- }
- return new CPDF_TextPageFind(pTextPage);
+ return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr;
}
IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() {
return new CPDF_LinkExtract();
@@ -112,43 +109,46 @@ IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() {
#define TEXT_RETURN_LINEFEED L"\r\n"
#define TEXT_LINEFEED L"\n"
#define TEXT_CHARRATIO_GAPDELTA 0.070
+
CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags)
- : m_charList(512),
+ : m_pPage(pPage),
+ m_charList(512),
m_TempCharList(50),
- m_pPreTextObj(NULL),
- m_IsParsered(FALSE),
+ m_parserflag(flags),
+ m_pPreTextObj(nullptr),
+ m_bIsParsed(false),
m_TextlineDir(-1),
m_CurlineRect(0, 0, 0, 0) {
- m_pPage = pPage;
- m_parserflag = flags;
m_TextBuf.EstimateSize(0, 10240);
pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(),
(int)pPage->GetPageHeight(), 0);
}
+
CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage,
CPDFText_ParseOptions ParserOptions)
: m_ParseOptions(ParserOptions),
+ m_pPage(pPage),
m_charList(512),
m_TempCharList(50),
- m_pPreTextObj(NULL),
- m_IsParsered(FALSE),
+ m_parserflag(0),
+ m_pPreTextObj(nullptr),
+ m_bIsParsed(false),
m_TextlineDir(-1),
m_CurlineRect(0, 0, 0, 0) {
- m_pPage = pPage;
- m_parserflag = 0;
m_TextBuf.EstimateSize(0, 10240);
pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(),
(int)pPage->GetPageHeight(), 0);
}
+
CPDF_TextPage::CPDF_TextPage(const CPDF_PageObjects* pPage, int flags)
- : m_charList(512),
+ : m_pPage(pPage),
+ m_charList(512),
m_TempCharList(50),
- m_pPreTextObj(NULL),
- m_IsParsered(FALSE),
+ m_parserflag(flags),
+ m_pPreTextObj(nullptr),
+ m_bIsParsed(false),
m_TextlineDir(-1),
m_CurlineRect(0, 0, 0, 0) {
- m_pPage = pPage;
- m_parserflag = flags;
m_TextBuf.EstimateSize(0, 10240);
CFX_FloatRect pageRect = pPage->CalcBoundingBox();
m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top);
@@ -172,16 +172,15 @@ bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) {
}
}
FX_BOOL CPDF_TextPage::ParseTextPage() {
- if (!m_pPage) {
- m_IsParsered = FALSE;
+ m_bIsParsed = false;
+ if (!m_pPage)
return FALSE;
- }
- m_IsParsered = FALSE;
+
m_TextBuf.Clear();
m_charList.RemoveAll();
m_pPreTextObj = NULL;
ProcessObject();
- m_IsParsered = TRUE;
+ m_bIsParsed = true;
if (!m_ParseOptions.m_bGetCharCodeOnly) {
m_CharIndex.RemoveAll();
int nCount = m_charList.GetSize();
@@ -269,7 +268,7 @@ void CPDF_TextPage::GetRectArray(int start,
if (start < 0 || nCount == 0) {
return;
}
- if (!m_IsParsered) {
+ if (!m_bIsParsed) {
return;
}
PAGECHAR_INFO info_curchar;
@@ -352,12 +351,9 @@ void CPDF_TextPage::GetRectArray(int start,
int CPDF_TextPage::GetIndexAtPos(CPDF_Point point,
FX_FLOAT xTolerance,
FX_FLOAT yTolerance) const {
- if (m_ParseOptions.m_bGetCharCodeOnly) {
- return -3;
- }
- if (!m_IsParsered) {
+ if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
return -3;
- }
+
int pos = 0;
int NearPos = -1;
double xdif = 5000, ydif = 5000;
@@ -400,9 +396,9 @@ int CPDF_TextPage::GetIndexAtPos(CPDF_Point point,
}
CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const {
CFX_WideString strText;
- if (m_ParseOptions.m_bGetCharCodeOnly || !m_IsParsered) {
+ if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
return strText;
- }
+
int nCount = m_charList.GetSize();
int pos = 0;
FX_FLOAT posy = 0;
@@ -438,12 +434,9 @@ CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const {
}
void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect,
CFX_RectArray& resRectArray) const {
- if (m_ParseOptions.m_bGetCharCodeOnly) {
+ if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
return;
- }
- if (!m_IsParsered) {
- return;
- }
+
CFX_FloatRect curRect;
FX_BOOL flagNewRect = TRUE;
CPDF_TextObject* pCurObj = NULL;
@@ -498,15 +491,12 @@ int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x,
return GetIndexAtPos(point, xTolerance, yTolerance);
}
void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO& info) const {
- if (m_ParseOptions.m_bGetCharCodeOnly) {
+ if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
return;
- }
- if (!m_IsParsered) {
- return;
- }
- if (index < 0 || index >= m_charList.GetSize()) {
+
+ if (index < 0 || index >= m_charList.GetSize())
return;
- }
+
PAGECHAR_INFO charinfo;
charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
info.m_Charcode = charinfo.m_CharCode;
@@ -561,12 +551,12 @@ void CPDF_TextPage::CheckMarkedContentObject(int32_t& start,
}
}
CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const {
- if (!m_IsParsered || nCount == 0) {
+ if (!m_bIsParsed || nCount == 0)
return L"";
- }
- if (start < 0) {
+
+ if (start < 0)
start = 0;
- }
+
if (nCount == -1) {
nCount = m_charList.GetSize() - start;
return m_TextBuf.GetWideString().Mid(start,
@@ -610,15 +600,9 @@ CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const {
return m_TextBuf.GetWideString().Mid(startindex, nCount);
}
int CPDF_TextPage::CountRects(int start, int nCount) {
- if (m_ParseOptions.m_bGetCharCodeOnly) {
- return -1;
- }
- if (!m_IsParsered) {
+ if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed || start < 0)
return -1;
- }
- if (start < 0) {
- return -1;
- }
+
if (nCount == -1 || nCount + start > m_charList.GetSize()) {
nCount = m_charList.GetSize() - start;
}
@@ -631,12 +615,12 @@ void CPDF_TextPage::GetRect(int rectIndex,
FX_FLOAT& top,
FX_FLOAT& right,
FX_FLOAT& bottom) const {
- if (m_ParseOptions.m_bGetCharCodeOnly) {
+ if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
return;
- }
- if (!m_IsParsered || rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) {
+
+ if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize())
return;
- }
+
left = m_SelRects.GetAt(rectIndex).left;
top = m_SelRects.GetAt(rectIndex).top;
right = m_SelRects.GetAt(rectIndex).right;
@@ -703,12 +687,12 @@ FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect,
return GetBaselineRotate(start, end, Rotate);
}
FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) {
- if (m_ParseOptions.m_bGetCharCodeOnly) {
+ if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
return FALSE;
- }
- if (!m_IsParsered || rectIndex < 0 || rectIndex > m_SelRects.GetSize()) {
+
+ if (rectIndex < 0 || rectIndex > m_SelRects.GetSize())
return FALSE;
- }
+
CFX_FloatRect rect = m_SelRects.GetAt(rectIndex);
return GetBaselineRotate(rect, Rotate);
}
@@ -717,13 +701,13 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left,
FX_FLOAT right,
FX_FLOAT bottom,
FX_BOOL bContains) {
- if (m_ParseOptions.m_bGetCharCodeOnly) {
+ if (m_ParseOptions.m_bGetCharCodeOnly)
return -1;
- }
+
m_Segment.RemoveAll();
- if (!m_IsParsered) {
+ if (!m_bIsParsed)
return -1;
- }
+
CFX_FloatRect rect(left, bottom, right, top);
rect.Normalize();
int nCount = m_charList.GetSize();
@@ -803,18 +787,15 @@ void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const {
count = m_Segment.GetAt(index).m_nCount;
}
int CPDF_TextPage::GetWordBreak(int index, int direction) const {
- if (m_ParseOptions.m_bGetCharCodeOnly) {
+ if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed)
return -1;
- }
- if (!m_IsParsered) {
- return -1;
- }
- if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) {
+
+ if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT)
return -1;
- }
- if (index < 0 || index >= m_charList.GetSize()) {
+
+ if (index < 0 || index >= m_charList.GetSize())
return -1;
- }
+
PAGECHAR_INFO charinfo;
charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index);
if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) {
@@ -2556,24 +2537,30 @@ int CPDF_TextPageFind::GetMatchedCount() const {
int resEnd = GetCharIndex(m_resEnd);
return resEnd - resStart + 1;
}
-CPDF_LinkExtract::CPDF_LinkExtract() : m_pTextPage(NULL), m_IsParserd(FALSE) {}
+
+CPDF_LinkExtract::CPDF_LinkExtract()
+ : m_pTextPage(nullptr), m_bIsParsed(false) {
+}
+
CPDF_LinkExtract::~CPDF_LinkExtract() {
DeleteLinkList();
}
+
FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) {
- if (!pTextPage || !pTextPage->IsParsed()) {
+ if (!pTextPage || !pTextPage->IsParsed())
return FALSE;
- }
+
m_pTextPage = (const CPDF_TextPage*)pTextPage;
m_strPageText = m_pTextPage->GetPageText(0, -1);
DeleteLinkList();
if (m_strPageText.IsEmpty()) {
return FALSE;
}
- parserLink();
- m_IsParserd = TRUE;
+ ParseLink();
+ m_bIsParsed = true;
return TRUE;
}
+
void CPDF_LinkExtract::DeleteLinkList() {
while (m_LinkList.GetSize()) {
CPDF_LinkExt* linkinfo = NULL;
@@ -2584,12 +2571,12 @@ void CPDF_LinkExtract::DeleteLinkList() {
m_LinkList.RemoveAll();
}
int CPDF_LinkExtract::CountLinks() const {
- if (!m_IsParserd) {
+ if (!m_bIsParsed) {
return -1;
}
return m_LinkList.GetSize();
}
-void CPDF_LinkExtract::parserLink() {
+void CPDF_LinkExtract::ParseLink() {
int start = 0, pos = 0;
int TotalChar = m_pTextPage->CountChars();
while (pos < TotalChar) {
@@ -2741,7 +2728,7 @@ void CPDF_LinkExtract::AppendToLinkList(int start,
}
CFX_WideString CPDF_LinkExtract::GetURL(int index) const {
- if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) {
+ if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
return L"";
}
CPDF_LinkExt* link = NULL;
@@ -2754,7 +2741,7 @@ CFX_WideString CPDF_LinkExtract::GetURL(int index) const {
void CPDF_LinkExtract::GetBoundedSegment(int index,
int& start,
int& count) const {
- if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) {
+ if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
return;
}
CPDF_LinkExt* link = NULL;
@@ -2766,7 +2753,7 @@ void CPDF_LinkExtract::GetBoundedSegment(int index,
count = link->m_Count;
}
void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const {
- if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) {
+ if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) {
return;
}
CPDF_LinkExt* link = NULL;
diff --git a/core/src/fpdftext/text_int.h b/core/src/fpdftext/text_int.h
index 59332e7d07..b5a7734e54 100644
--- a/core/src/fpdftext/text_int.h
+++ b/core/src/fpdftext/text_int.h
@@ -60,7 +60,7 @@ class CPDF_TextPage : public IPDF_TextPage {
// IPDF_TextPage
FX_BOOL ParseTextPage() override;
void NormalizeObjects(FX_BOOL bNormalize) override;
- bool IsParsed() const override { return m_IsParsered; }
+ bool IsParsed() const override { return m_bIsParsed; }
int CharIndexFromTextIndex(int TextIndex) const override;
int TextIndexFromCharIndex(int CharIndex) const override;
int CountChars() const override;
@@ -129,20 +129,18 @@ class CPDF_TextPage : public IPDF_TextPage {
int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj);
int32_t FindTextlineFlowDirection();
- protected:
CPDFText_ParseOptions m_ParseOptions;
CFX_WordArray m_CharIndex;
- const CPDF_PageObjects* m_pPage;
+ const CPDF_PageObjects* const m_pPage;
PAGECHAR_InfoArray m_charList;
CFX_WideTextBuf m_TextBuf;
PAGECHAR_InfoArray m_TempCharList;
CFX_WideTextBuf m_TempTextBuf;
- int m_parserflag;
+ const int m_parserflag;
CPDF_TextObject* m_pPreTextObj;
CFX_AffineMatrix m_perMatrix;
- FX_BOOL m_IsParsered;
+ bool m_bIsParsed;
CFX_AffineMatrix m_DisplayMatrix;
-
SEGMENT_Array m_Segment;
CFX_RectArray m_SelRects;
LINEOBJ m_LineObj;
@@ -152,7 +150,7 @@ class CPDF_TextPage : public IPDF_TextPage {
class CPDF_TextPageFind : public IPDF_TextPageFind {
public:
- CPDF_TextPageFind(const IPDF_TextPage* pTextPage);
+ explicit CPDF_TextPageFind(const IPDF_TextPage* pTextPage);
~CPDF_TextPageFind() override {}
// IPDF_TextPageFind
@@ -220,10 +218,10 @@ class CPDF_LinkExtract : public IPDF_LinkExtract {
void GetBoundedSegment(int index, int& start, int& count) const override;
void GetRects(int index, CFX_RectArray& rects) const override;
- FX_BOOL IsExtract() const { return m_IsParserd; }
+ FX_BOOL IsExtract() const { return m_bIsParsed; }
protected:
- void parserLink();
+ void ParseLink();
void DeleteLinkList();
FX_BOOL CheckWebLink(CFX_WideString& strBeCheck);
FX_BOOL CheckMailLink(CFX_WideString& str);
@@ -233,7 +231,7 @@ class CPDF_LinkExtract : public IPDF_LinkExtract {
LINK_InfoArray m_LinkList;
const CPDF_TextPage* m_pTextPage;
CFX_WideString m_strPageText;
- FX_BOOL m_IsParserd;
+ bool m_bIsParsed;
};
FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst);