diff options
Diffstat (limited to 'core/src/fpdftext')
-rw-r--r-- | core/src/fpdftext/fpdf_text_int.cpp | 159 | ||||
-rw-r--r-- | core/src/fpdftext/text_int.h | 18 |
2 files changed, 81 insertions, 96 deletions
diff --git a/core/src/fpdftext/fpdf_text_int.cpp b/core/src/fpdftext/fpdf_text_int.cpp index 1b476d7b1f..aa25728c15 100644 --- a/core/src/fpdftext/fpdf_text_int.cpp +++ b/core/src/fpdftext/fpdf_text_int.cpp @@ -96,10 +96,7 @@ IPDF_TextPage* IPDF_TextPage::CreateTextPage(const CPDF_PageObjects* pObjs, } IPDF_TextPageFind* IPDF_TextPageFind::CreatePageFind( const IPDF_TextPage* pTextPage) { - if (!pTextPage) { - return NULL; - } - return new CPDF_TextPageFind(pTextPage); + return pTextPage ? new CPDF_TextPageFind(pTextPage) : nullptr; } IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { return new CPDF_LinkExtract(); @@ -112,43 +109,46 @@ IPDF_LinkExtract* IPDF_LinkExtract::CreateLinkExtract() { #define TEXT_RETURN_LINEFEED L"\r\n" #define TEXT_LINEFEED L"\n" #define TEXT_CHARRATIO_GAPDELTA 0.070 + CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, int flags) - : m_charList(512), + : m_pPage(pPage), + m_charList(512), m_TempCharList(50), - m_pPreTextObj(NULL), - m_IsParsered(FALSE), + m_parserflag(flags), + m_pPreTextObj(nullptr), + m_bIsParsed(false), m_TextlineDir(-1), m_CurlineRect(0, 0, 0, 0) { - m_pPage = pPage; - m_parserflag = flags; m_TextBuf.EstimateSize(0, 10240); pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), (int)pPage->GetPageHeight(), 0); } + CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions) : m_ParseOptions(ParserOptions), + m_pPage(pPage), m_charList(512), m_TempCharList(50), - m_pPreTextObj(NULL), - m_IsParsered(FALSE), + m_parserflag(0), + m_pPreTextObj(nullptr), + m_bIsParsed(false), m_TextlineDir(-1), m_CurlineRect(0, 0, 0, 0) { - m_pPage = pPage; - m_parserflag = 0; m_TextBuf.EstimateSize(0, 10240); pPage->GetDisplayMatrix(m_DisplayMatrix, 0, 0, (int)pPage->GetPageWidth(), (int)pPage->GetPageHeight(), 0); } + CPDF_TextPage::CPDF_TextPage(const CPDF_PageObjects* pPage, int flags) - : m_charList(512), + : m_pPage(pPage), + m_charList(512), m_TempCharList(50), - m_pPreTextObj(NULL), - m_IsParsered(FALSE), + m_parserflag(flags), + m_pPreTextObj(nullptr), + m_bIsParsed(false), m_TextlineDir(-1), m_CurlineRect(0, 0, 0, 0) { - m_pPage = pPage; - m_parserflag = flags; m_TextBuf.EstimateSize(0, 10240); CFX_FloatRect pageRect = pPage->CalcBoundingBox(); m_DisplayMatrix = CFX_AffineMatrix(1, 0, 0, -1, pageRect.right, pageRect.top); @@ -172,16 +172,15 @@ bool CPDF_TextPage::IsControlChar(const PAGECHAR_INFO& charInfo) { } } FX_BOOL CPDF_TextPage::ParseTextPage() { - if (!m_pPage) { - m_IsParsered = FALSE; + m_bIsParsed = false; + if (!m_pPage) return FALSE; - } - m_IsParsered = FALSE; + m_TextBuf.Clear(); m_charList.RemoveAll(); m_pPreTextObj = NULL; ProcessObject(); - m_IsParsered = TRUE; + m_bIsParsed = true; if (!m_ParseOptions.m_bGetCharCodeOnly) { m_CharIndex.RemoveAll(); int nCount = m_charList.GetSize(); @@ -269,7 +268,7 @@ void CPDF_TextPage::GetRectArray(int start, if (start < 0 || nCount == 0) { return; } - if (!m_IsParsered) { + if (!m_bIsParsed) { return; } PAGECHAR_INFO info_curchar; @@ -352,12 +351,9 @@ void CPDF_TextPage::GetRectArray(int start, int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, FX_FLOAT xTolerance, FX_FLOAT yTolerance) const { - if (m_ParseOptions.m_bGetCharCodeOnly) { - return -3; - } - if (!m_IsParsered) { + if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) return -3; - } + int pos = 0; int NearPos = -1; double xdif = 5000, ydif = 5000; @@ -400,9 +396,9 @@ int CPDF_TextPage::GetIndexAtPos(CPDF_Point point, } CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { CFX_WideString strText; - if (m_ParseOptions.m_bGetCharCodeOnly || !m_IsParsered) { + if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) return strText; - } + int nCount = m_charList.GetSize(); int pos = 0; FX_FLOAT posy = 0; @@ -438,12 +434,9 @@ CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { } void CPDF_TextPage::GetRectsArrayByRect(const CFX_FloatRect& rect, CFX_RectArray& resRectArray) const { - if (m_ParseOptions.m_bGetCharCodeOnly) { + if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) return; - } - if (!m_IsParsered) { - return; - } + CFX_FloatRect curRect; FX_BOOL flagNewRect = TRUE; CPDF_TextObject* pCurObj = NULL; @@ -498,15 +491,12 @@ int CPDF_TextPage::GetIndexAtPos(FX_FLOAT x, return GetIndexAtPos(point, xTolerance, yTolerance); } void CPDF_TextPage::GetCharInfo(int index, FPDF_CHAR_INFO& info) const { - if (m_ParseOptions.m_bGetCharCodeOnly) { + if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) return; - } - if (!m_IsParsered) { - return; - } - if (index < 0 || index >= m_charList.GetSize()) { + + if (index < 0 || index >= m_charList.GetSize()) return; - } + PAGECHAR_INFO charinfo; charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); info.m_Charcode = charinfo.m_CharCode; @@ -561,12 +551,12 @@ void CPDF_TextPage::CheckMarkedContentObject(int32_t& start, } } CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { - if (!m_IsParsered || nCount == 0) { + if (!m_bIsParsed || nCount == 0) return L""; - } - if (start < 0) { + + if (start < 0) start = 0; - } + if (nCount == -1) { nCount = m_charList.GetSize() - start; return m_TextBuf.GetWideString().Mid(start, @@ -610,15 +600,9 @@ CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { return m_TextBuf.GetWideString().Mid(startindex, nCount); } int CPDF_TextPage::CountRects(int start, int nCount) { - if (m_ParseOptions.m_bGetCharCodeOnly) { - return -1; - } - if (!m_IsParsered) { + if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed || start < 0) return -1; - } - if (start < 0) { - return -1; - } + if (nCount == -1 || nCount + start > m_charList.GetSize()) { nCount = m_charList.GetSize() - start; } @@ -631,12 +615,12 @@ void CPDF_TextPage::GetRect(int rectIndex, FX_FLOAT& top, FX_FLOAT& right, FX_FLOAT& bottom) const { - if (m_ParseOptions.m_bGetCharCodeOnly) { + if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) return; - } - if (!m_IsParsered || rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) { + + if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) return; - } + left = m_SelRects.GetAt(rectIndex).left; top = m_SelRects.GetAt(rectIndex).top; right = m_SelRects.GetAt(rectIndex).right; @@ -703,12 +687,12 @@ FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect, return GetBaselineRotate(start, end, Rotate); } FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { - if (m_ParseOptions.m_bGetCharCodeOnly) { + if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) return FALSE; - } - if (!m_IsParsered || rectIndex < 0 || rectIndex > m_SelRects.GetSize()) { + + if (rectIndex < 0 || rectIndex > m_SelRects.GetSize()) return FALSE; - } + CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); return GetBaselineRotate(rect, Rotate); } @@ -717,13 +701,13 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains) { - if (m_ParseOptions.m_bGetCharCodeOnly) { + if (m_ParseOptions.m_bGetCharCodeOnly) return -1; - } + m_Segment.RemoveAll(); - if (!m_IsParsered) { + if (!m_bIsParsed) return -1; - } + CFX_FloatRect rect(left, bottom, right, top); rect.Normalize(); int nCount = m_charList.GetSize(); @@ -803,18 +787,15 @@ void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { count = m_Segment.GetAt(index).m_nCount; } int CPDF_TextPage::GetWordBreak(int index, int direction) const { - if (m_ParseOptions.m_bGetCharCodeOnly) { + if (m_ParseOptions.m_bGetCharCodeOnly || !m_bIsParsed) return -1; - } - if (!m_IsParsered) { - return -1; - } - if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) { + + if (direction != FPDFTEXT_LEFT && direction != FPDFTEXT_RIGHT) return -1; - } - if (index < 0 || index >= m_charList.GetSize()) { + + if (index < 0 || index >= m_charList.GetSize()) return -1; - } + PAGECHAR_INFO charinfo; charinfo = *(PAGECHAR_INFO*)m_charList.GetAt(index); if (charinfo.m_Index == -1 || charinfo.m_Flag == FPDFTEXT_CHAR_GENERATED) { @@ -2556,24 +2537,30 @@ int CPDF_TextPageFind::GetMatchedCount() const { int resEnd = GetCharIndex(m_resEnd); return resEnd - resStart + 1; } -CPDF_LinkExtract::CPDF_LinkExtract() : m_pTextPage(NULL), m_IsParserd(FALSE) {} + +CPDF_LinkExtract::CPDF_LinkExtract() + : m_pTextPage(nullptr), m_bIsParsed(false) { +} + CPDF_LinkExtract::~CPDF_LinkExtract() { DeleteLinkList(); } + FX_BOOL CPDF_LinkExtract::ExtractLinks(const IPDF_TextPage* pTextPage) { - if (!pTextPage || !pTextPage->IsParsed()) { + if (!pTextPage || !pTextPage->IsParsed()) return FALSE; - } + m_pTextPage = (const CPDF_TextPage*)pTextPage; m_strPageText = m_pTextPage->GetPageText(0, -1); DeleteLinkList(); if (m_strPageText.IsEmpty()) { return FALSE; } - parserLink(); - m_IsParserd = TRUE; + ParseLink(); + m_bIsParsed = true; return TRUE; } + void CPDF_LinkExtract::DeleteLinkList() { while (m_LinkList.GetSize()) { CPDF_LinkExt* linkinfo = NULL; @@ -2584,12 +2571,12 @@ void CPDF_LinkExtract::DeleteLinkList() { m_LinkList.RemoveAll(); } int CPDF_LinkExtract::CountLinks() const { - if (!m_IsParserd) { + if (!m_bIsParsed) { return -1; } return m_LinkList.GetSize(); } -void CPDF_LinkExtract::parserLink() { +void CPDF_LinkExtract::ParseLink() { int start = 0, pos = 0; int TotalChar = m_pTextPage->CountChars(); while (pos < TotalChar) { @@ -2741,7 +2728,7 @@ void CPDF_LinkExtract::AppendToLinkList(int start, } CFX_WideString CPDF_LinkExtract::GetURL(int index) const { - if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { + if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { return L""; } CPDF_LinkExt* link = NULL; @@ -2754,7 +2741,7 @@ CFX_WideString CPDF_LinkExtract::GetURL(int index) const { void CPDF_LinkExtract::GetBoundedSegment(int index, int& start, int& count) const { - if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { + if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { return; } CPDF_LinkExt* link = NULL; @@ -2766,7 +2753,7 @@ void CPDF_LinkExtract::GetBoundedSegment(int index, count = link->m_Count; } void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { - if (!m_IsParserd || index < 0 || index >= m_LinkList.GetSize()) { + if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { return; } CPDF_LinkExt* link = NULL; diff --git a/core/src/fpdftext/text_int.h b/core/src/fpdftext/text_int.h index 59332e7d07..b5a7734e54 100644 --- a/core/src/fpdftext/text_int.h +++ b/core/src/fpdftext/text_int.h @@ -60,7 +60,7 @@ class CPDF_TextPage : public IPDF_TextPage { // IPDF_TextPage FX_BOOL ParseTextPage() override; void NormalizeObjects(FX_BOOL bNormalize) override; - bool IsParsed() const override { return m_IsParsered; } + bool IsParsed() const override { return m_bIsParsed; } int CharIndexFromTextIndex(int TextIndex) const override; int TextIndexFromCharIndex(int CharIndex) const override; int CountChars() const override; @@ -129,20 +129,18 @@ class CPDF_TextPage : public IPDF_TextPage { int32_t GetTextObjectWritingMode(const CPDF_TextObject* pTextObj); int32_t FindTextlineFlowDirection(); - protected: CPDFText_ParseOptions m_ParseOptions; CFX_WordArray m_CharIndex; - const CPDF_PageObjects* m_pPage; + const CPDF_PageObjects* const m_pPage; PAGECHAR_InfoArray m_charList; CFX_WideTextBuf m_TextBuf; PAGECHAR_InfoArray m_TempCharList; CFX_WideTextBuf m_TempTextBuf; - int m_parserflag; + const int m_parserflag; CPDF_TextObject* m_pPreTextObj; CFX_AffineMatrix m_perMatrix; - FX_BOOL m_IsParsered; + bool m_bIsParsed; CFX_AffineMatrix m_DisplayMatrix; - SEGMENT_Array m_Segment; CFX_RectArray m_SelRects; LINEOBJ m_LineObj; @@ -152,7 +150,7 @@ class CPDF_TextPage : public IPDF_TextPage { class CPDF_TextPageFind : public IPDF_TextPageFind { public: - CPDF_TextPageFind(const IPDF_TextPage* pTextPage); + explicit CPDF_TextPageFind(const IPDF_TextPage* pTextPage); ~CPDF_TextPageFind() override {} // IPDF_TextPageFind @@ -220,10 +218,10 @@ class CPDF_LinkExtract : public IPDF_LinkExtract { void GetBoundedSegment(int index, int& start, int& count) const override; void GetRects(int index, CFX_RectArray& rects) const override; - FX_BOOL IsExtract() const { return m_IsParserd; } + FX_BOOL IsExtract() const { return m_bIsParsed; } protected: - void parserLink(); + void ParseLink(); void DeleteLinkList(); FX_BOOL CheckWebLink(CFX_WideString& strBeCheck); FX_BOOL CheckMailLink(CFX_WideString& str); @@ -233,7 +231,7 @@ class CPDF_LinkExtract : public IPDF_LinkExtract { LINK_InfoArray m_LinkList; const CPDF_TextPage* m_pTextPage; CFX_WideString m_strPageText; - FX_BOOL m_IsParserd; + bool m_bIsParsed; }; FX_STRSIZE FX_Unicode_GetNormalization(FX_WCHAR wch, FX_WCHAR* pDst); |