From 691411873cb16eb82d5912d0f73b96310f632932 Mon Sep 17 00:00:00 2001 From: tsepez Date: Thu, 21 Apr 2016 10:43:39 -0700 Subject: Remove CFX_ArrayTemplate from CPDF_LinkExtract Use unqiue_ptrs while we're at it, also better ctor. Review URL: https://codereview.chromium.org/1896303002 --- core/fpdftext/fpdf_text_int.cpp | 184 ++++++++----------------------- core/fpdftext/fpdf_text_int_unittest.cpp | 3 + core/fpdftext/include/cpdf_linkextract.h | 40 +++---- core/fpdftext/include/cpdf_textpage.h | 6 +- fpdfsdk/fpdftext.cpp | 58 +++++----- public/fpdf_text.h | 30 +++-- 6 files changed, 114 insertions(+), 207 deletions(-) diff --git a/core/fpdftext/fpdf_text_int.cpp b/core/fpdftext/fpdf_text_int.cpp index 741331fb77..4db4d5c09f 100644 --- a/core/fpdftext/fpdf_text_int.cpp +++ b/core/fpdftext/fpdf_text_int.cpp @@ -262,7 +262,7 @@ int CPDF_TextPage::TextIndexFromCharIndex(int CharIndex) const { void CPDF_TextPage::GetRectArray(int start, int nCount, - CFX_RectArray& rectArray) const { + CFX_RectArray* rectArray) const { if (start < 0 || nCount == 0) { return; } @@ -290,7 +290,7 @@ void CPDF_TextPage::GetRectArray(int start, pCurObj = info_curchar.m_pTextObj; } if (pCurObj != info_curchar.m_pTextObj) { - rectArray.Add(rect); + rectArray->Add(rect); pCurObj = info_curchar.m_pTextObj; flagNewRect = TRUE; } @@ -343,7 +343,7 @@ void CPDF_TextPage::GetRectArray(int start, } } } - rectArray.Add(rect); + rectArray->Add(rect); } int CPDF_TextPage::GetIndexAtPos(CFX_FloatPoint point, @@ -592,7 +592,7 @@ int CPDF_TextPage::CountRects(int start, int nCount) { nCount = pdfium::CollectionSize(m_CharList) - start; } m_SelRects.RemoveAll(); - GetRectArray(start, nCount, m_SelRects); + GetRectArray(start, nCount, &m_SelRects); return m_SelRects.GetSize(); } @@ -649,35 +649,6 @@ FX_BOOL CPDF_TextPage::GetBaselineRotate(int start, int end, int& Rotate) { return TRUE; } -FX_BOOL CPDF_TextPage::GetBaselineRotate(const CFX_FloatRect& rect, - int& Rotate) { - int start, end, count, - n = CountBoundedSegments(rect.left, rect.top, rect.right, rect.bottom, - TRUE); - if (n < 1) { - return FALSE; - } - if (n > 1) { - GetBoundedSegment(n - 1, start, count); - end = start + count - 1; - GetBoundedSegment(0, start, count); - } else { - GetBoundedSegment(0, start, count); - end = start + count - 1; - } - return GetBaselineRotate(start, end, Rotate); -} -FX_BOOL CPDF_TextPage::GetBaselineRotate(int rectIndex, int& Rotate) { - if (!m_bIsParsed) - return FALSE; - - if (rectIndex < 0 || rectIndex >= m_SelRects.GetSize()) - return FALSE; - - CFX_FloatRect rect = m_SelRects.GetAt(rectIndex); - return GetBaselineRotate(rect, Rotate); -} - int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, FX_FLOAT top, FX_FLOAT right, @@ -756,14 +727,6 @@ int CPDF_TextPage::CountBoundedSegments(FX_FLOAT left, return m_Segments.GetSize(); } -void CPDF_TextPage::GetBoundedSegment(int index, int& start, int& count) const { - if (index < 0 || index >= m_Segments.GetSize()) { - return; - } - start = m_Segments.GetAt(index).m_Start; - count = m_Segments.GetAt(index).m_nCount; -} - int CPDF_TextPage::GetWordBreak(int index, int direction) const { if (!m_bIsParsed) return -1; @@ -2126,7 +2089,7 @@ FX_BOOL CPDF_TextPageFind::FindNext() { m_IsFind = TRUE; int resStart = GetCharIndex(m_resStart); int resEnd = GetCharIndex(m_resEnd); - m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, m_resArray); + m_pTextPage->GetRectArray(resStart, resEnd - resStart + 1, &m_resArray); if (m_flags & FPDFTEXT_CONSECUTIVE) { m_findNextStart = m_resStart + 1; m_findPreStart = m_resEnd - 1; @@ -2172,7 +2135,7 @@ FX_BOOL CPDF_TextPageFind::FindPrev() { m_resStart = m_pTextPage->TextIndexFromCharIndex(order); m_resEnd = m_pTextPage->TextIndexFromCharIndex(order + MatchedCount - 1); m_IsFind = TRUE; - m_pTextPage->GetRectArray(order, MatchedCount, m_resArray); + m_pTextPage->GetRectArray(order, MatchedCount, &m_resArray); if (m_flags & FPDFTEXT_CONSECUTIVE) { m_findNextStart = m_resStart + 1; m_findPreStart = m_resEnd - 1; @@ -2330,43 +2293,22 @@ int CPDF_TextPageFind::GetMatchedCount() const { return resEnd - resStart + 1; } -CPDF_LinkExtract::CPDF_LinkExtract() - : m_pTextPage(nullptr), m_bIsParsed(false) {} +CPDF_LinkExtract::CPDF_LinkExtract(const CPDF_TextPage* pTextPage) + : m_pTextPage(pTextPage) {} CPDF_LinkExtract::~CPDF_LinkExtract() { - DeleteLinkList(); } -FX_BOOL CPDF_LinkExtract::ExtractLinks(const CPDF_TextPage* pTextPage) { - if (!pTextPage || !pTextPage->IsParsed()) - return FALSE; +void CPDF_LinkExtract::ExtractLinks() { + m_LinkArray.clear(); + if (!m_pTextPage->IsParsed()) + return; - m_pTextPage = (const CPDF_TextPage*)pTextPage; m_strPageText = m_pTextPage->GetPageText(0, -1); - DeleteLinkList(); - if (m_strPageText.IsEmpty()) { - return FALSE; - } - ParseLink(); - m_bIsParsed = true; - return TRUE; -} - -void CPDF_LinkExtract::DeleteLinkList() { - while (m_LinkList.GetSize()) { - CPDF_LinkExt* linkinfo = NULL; - linkinfo = m_LinkList.GetAt(0); - m_LinkList.RemoveAt(0); - delete linkinfo; - } - m_LinkList.RemoveAll(); -} + if (m_strPageText.IsEmpty()) + return; -int CPDF_LinkExtract::CountLinks() const { - if (!m_bIsParsed) { - return -1; - } - return m_LinkList.GetSize(); + ParseLink(); } void CPDF_LinkExtract::ParseLink() { @@ -2395,7 +2337,7 @@ void CPDF_LinkExtract::ParseLink() { } if (nCount > 5 && (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) { - AppendToLinkList(start, nCount, strBeCheck); + m_LinkArray.push_back({start, nCount, strBeCheck}); } } start = ++pos; @@ -2405,47 +2347,46 @@ void CPDF_LinkExtract::ParseLink() { } } -FX_BOOL CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { +bool CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) { CFX_WideString str = strBeCheck; str.MakeLower(); if (str.Find(L"http://www.") != -1) { strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://www.")); - return TRUE; + return true; } if (str.Find(L"http://") != -1) { strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"http://")); - return TRUE; + return true; } if (str.Find(L"https://www.") != -1) { strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://www.")); - return TRUE; + return true; } if (str.Find(L"https://") != -1) { strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"https://")); - return TRUE; + return true; } if (str.Find(L"www.") != -1) { strBeCheck = strBeCheck.Right(str.GetLength() - str.Find(L"www.")); strBeCheck = L"http://" + strBeCheck; - return TRUE; + return true; } - return FALSE; + return false; } bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { int aPos = str.Find(L'@'); // Invalid when no '@'. - if (aPos < 1) { - return FALSE; - } + if (aPos < 1) + return false; // Check the local part. int pPos = aPos; // Used to track the position of '@' or '.'. for (int i = aPos - 1; i >= 0; i--) { FX_WCHAR ch = str.GetAt(i); - if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch)) { + if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch)) continue; - } + if (ch != L'.' || i == pPos - 1 || i == 0) { if (i == aPos - 1) { // There is '.' or invalid char before '@'. @@ -2463,25 +2404,25 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { // Check the domain name part. aPos = str.Find(L'@'); - if (aPos < 1) { - return FALSE; - } + if (aPos < 1) + return false; + str.TrimRight(L'.'); // At least one '.' in domain name, but not at the beginning. // TODO(weili): RFC5322 allows domain names to be a local name without '.'. // Check whether we should remove this check. int ePos = str.Find(L'.', aPos + 1); - if (ePos == -1 || ePos == aPos + 1) { - return FALSE; - } + if (ePos == -1 || ePos == aPos + 1) + return false; + // Validate all other chars in domain name. int nLen = str.GetLength(); pPos = 0; // Used to track the position of '.'. for (int i = aPos + 1; i < nLen; i++) { FX_WCHAR wch = str.GetAt(i); - if (wch == L'-' || FXSYS_iswalnum(wch)) { + if (wch == L'-' || FXSYS_iswalnum(wch)) continue; - } + if (wch != L'.' || i == pPos + 1) { // Domain name should end before invalid char. int host_end = i == pPos + 1 ? i - 2 : i - 1; @@ -2490,61 +2431,24 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) { str = str.Left(host_end + 1); break; } - return FALSE; + return false; } pPos = i; } - if (str.Find(L"mailto:") == -1) { + if (str.Find(L"mailto:") == -1) str = L"mailto:" + str; - } - return TRUE; -} -void CPDF_LinkExtract::AppendToLinkList(int start, - int count, - const CFX_WideString& strUrl) { - CPDF_LinkExt* linkInfo = new CPDF_LinkExt; - linkInfo->m_strUrl = strUrl; - linkInfo->m_Start = start; - linkInfo->m_Count = count; - m_LinkList.Add(linkInfo); + return true; } -CFX_WideString CPDF_LinkExtract::GetURL(int index) const { - if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { - return L""; - } - CPDF_LinkExt* link = NULL; - link = m_LinkList.GetAt(index); - if (!link) { - return L""; - } - return link->m_strUrl; -} -void CPDF_LinkExtract::GetBoundedSegment(int index, - int& start, - int& count) const { - if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { - return; - } - CPDF_LinkExt* link = NULL; - link = m_LinkList.GetAt(index); - if (!link) { - return; - } - start = link->m_Start; - count = link->m_Count; +CFX_WideString CPDF_LinkExtract::GetURL(size_t index) const { + return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L""; } -void CPDF_LinkExtract::GetRects(int index, CFX_RectArray& rects) const { - if (!m_bIsParsed || index < 0 || index >= m_LinkList.GetSize()) { - return; - } - CPDF_LinkExt* link = NULL; - link = m_LinkList.GetAt(index); - if (!link) { - return; +void CPDF_LinkExtract::GetRects(size_t index, CFX_RectArray* pRects) const { + if (index < m_LinkArray.size()) { + m_pTextPage->GetRectArray(m_LinkArray[index].m_Start, + m_LinkArray[index].m_Count, pRects); } - m_pTextPage->GetRectArray(link->m_Start, link->m_Count, rects); } diff --git a/core/fpdftext/fpdf_text_int_unittest.cpp b/core/fpdftext/fpdf_text_int_unittest.cpp index e1dd0f7504..0ee11d2d57 100644 --- a/core/fpdftext/fpdf_text_int_unittest.cpp +++ b/core/fpdftext/fpdf_text_int_unittest.cpp @@ -8,6 +8,9 @@ // Class to help test functions in CPDF_LinkExtract class. class CPDF_TestLinkExtract : public CPDF_LinkExtract { + public: + CPDF_TestLinkExtract() : CPDF_LinkExtract(nullptr) {} + private: // Add test cases as friends to access protected member functions. // Access CheckMailLink. diff --git a/core/fpdftext/include/cpdf_linkextract.h b/core/fpdftext/include/cpdf_linkextract.h index 263768ee5d..4f9537c799 100644 --- a/core/fpdftext/include/cpdf_linkextract.h +++ b/core/fpdftext/include/cpdf_linkextract.h @@ -7,6 +7,8 @@ #ifndef CORE_FPDFTEXT_INCLUDE_CPDF_LINKEXTRACT_H_ #define CORE_FPDFTEXT_INCLUDE_CPDF_LINKEXTRACT_H_ +#include + #include "core/fxcrt/include/fx_basic.h" #include "core/fxcrt/include/fx_coordinates.h" #include "core/fxcrt/include/fx_string.h" @@ -14,41 +16,31 @@ class CPDF_TextPage; -class CPDF_LinkExt { - public: - CPDF_LinkExt() {} - ~CPDF_LinkExt() {} - - int m_Start; - int m_Count; - CFX_WideString m_strUrl; -}; - class CPDF_LinkExtract { public: - CPDF_LinkExtract(); + explicit CPDF_LinkExtract(const CPDF_TextPage* pTextPage); ~CPDF_LinkExtract(); - FX_BOOL ExtractLinks(const CPDF_TextPage* pTextPage); - int CountLinks() const; - CFX_WideString GetURL(int index) const; - void GetBoundedSegment(int index, int& start, int& count) const; - void GetRects(int index, CFX_RectArray& rects) const; - - FX_BOOL IsExtract() const { return m_bIsParsed; } + void ExtractLinks(); + size_t CountLinks() const { return m_LinkArray.size(); } + CFX_WideString GetURL(size_t index) const; + void GetRects(size_t index, CFX_RectArray* pRects) const; protected: void ParseLink(); - void DeleteLinkList(); - FX_BOOL CheckWebLink(CFX_WideString& strBeCheck); + bool CheckWebLink(CFX_WideString& str); bool CheckMailLink(CFX_WideString& str); - void AppendToLinkList(int start, int count, const CFX_WideString& strUrl); private: - CFX_ArrayTemplate m_LinkList; - const CPDF_TextPage* m_pTextPage; + struct Link { + int m_Start; + int m_Count; + CFX_WideString m_strUrl; + }; + + const CPDF_TextPage* const m_pTextPage; CFX_WideString m_strPageText; - bool m_bIsParsed; + std::vector m_LinkArray; }; #endif // CORE_FPDFTEXT_INCLUDE_CPDF_LINKEXTRACT_H_ diff --git a/core/fpdftext/include/cpdf_textpage.h b/core/fpdftext/include/cpdf_textpage.h index 19e8791b5a..8d608b0bf0 100644 --- a/core/fpdftext/include/cpdf_textpage.h +++ b/core/fpdftext/include/cpdf_textpage.h @@ -67,7 +67,7 @@ class CPDF_TextPage { int TextIndexFromCharIndex(int CharIndex) const; int CountChars() const; void GetCharInfo(int index, FPDF_CHAR_INFO* info) const; - void GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const; + void GetRectArray(int start, int nCount, CFX_RectArray* rectArray) const; int GetIndexAtPos(CFX_FloatPoint point, FX_FLOAT xTolerance, FX_FLOAT yTolerance) const; @@ -85,14 +85,12 @@ class CPDF_TextPage { FX_FLOAT& top, FX_FLOAT& right, FX_FLOAT& bottom) const; - FX_BOOL GetBaselineRotate(int rectIndex, int& Rotate); - FX_BOOL GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate); int CountBoundedSegments(FX_FLOAT left, FX_FLOAT top, FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE); - void GetBoundedSegment(int index, int& start, int& count) const; + int GetWordBreak(int index, int direction) const; static FX_BOOL IsRectIntersect(const CFX_FloatRect& rect1, diff --git a/fpdfsdk/fpdftext.cpp b/fpdfsdk/fpdftext.cpp index 007e18274e..2a84131422 100644 --- a/fpdfsdk/fpdftext.cpp +++ b/fpdfsdk/fpdftext.cpp @@ -12,6 +12,7 @@ #include "core/fpdftext/include/cpdf_textpage.h" #include "core/fpdftext/include/cpdf_textpagefind.h" #include "fpdfsdk/include/fsdk_define.h" +#include "third_party/base/numerics/safe_conversions.h" #ifdef PDF_ENABLE_XFA #include "fpdfsdk/fpdfxfa/include/fpdfxfa_doc.h" @@ -273,8 +274,9 @@ DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) { if (!text_page) return nullptr; - CPDF_LinkExtract* pageLink = new CPDF_LinkExtract; - pageLink->ExtractLinks(CPDFTextPageFromFPDFTextPage(text_page)); + CPDF_LinkExtract* pageLink = + new CPDF_LinkExtract(CPDFTextPageFromFPDFTextPage(text_page)); + pageLink->ExtractLinks(); return pageLink; } @@ -283,42 +285,40 @@ DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) { return 0; CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page); - return pageLink->CountLinks(); + return pdfium::base::checked_cast(pageLink->CountLinks()); } DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page, int link_index, unsigned short* buffer, int buflen) { - if (!link_page) - return 0; - - CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page); - CFX_WideString url = pageLink->GetURL(link_index); - - CFX_ByteString cbUTF16URL = url.UTF16LE_Encode(); - int len = cbUTF16URL.GetLength() / sizeof(unsigned short); + CFX_WideString wsUrl(L""); + if (link_page && link_index >= 0) { + CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page); + wsUrl = pageLink->GetURL(link_index); + } + CFX_ByteString cbUTF16URL = wsUrl.UTF16LE_Encode(); + int required = cbUTF16URL.GetLength() / sizeof(unsigned short); if (!buffer || buflen <= 0) - return len; + return required; - int size = len < buflen ? len : buflen; + int size = std::min(required, buflen); if (size > 0) { int buf_size = size * sizeof(unsigned short); FXSYS_memcpy(buffer, cbUTF16URL.GetBuffer(buf_size), buf_size); - cbUTF16URL.ReleaseBuffer(buf_size); } return size; } DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, int link_index) { - if (!link_page) + if (!link_page || link_index < 0) return 0; + CFX_RectArray rects; CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page); - CFX_RectArray rectArray; - pageLink->GetRects(link_index, rectArray); - return rectArray.GetSize(); + pageLink->GetRects(link_index, &rects); + return rects.GetSize(); } DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, @@ -328,20 +328,22 @@ DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, double* top, double* right, double* bottom) { - if (!link_page) + if (!link_page || link_index < 0 || rect_index < 0) return; - CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page); CFX_RectArray rectArray; - pageLink->GetRects(link_index, rectArray); - if (rect_index >= 0 && rect_index < rectArray.GetSize()) { - CFX_FloatRect rect = rectArray.GetAt(rect_index); - *left = rect.left; - *right = rect.right; - *top = rect.top; - *bottom = rect.bottom; - } + CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page); + pageLink->GetRects(link_index, &rectArray); + if (rect_index >= rectArray.GetSize()) + return; + + CFX_FloatRect rect = rectArray.GetAt(rect_index); + *left = rect.left; + *right = rect.right; + *top = rect.top; + *bottom = rect.bottom; } + DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) { delete CPDFLinkExtractFromFPDFPageLink(link_page); } diff --git a/public/fpdf_text.h b/public/fpdf_text.h index 32cc131859..2bf574b12f 100644 --- a/public/fpdf_text.h +++ b/public/fpdf_text.h @@ -351,13 +351,19 @@ DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page); // Parameters: // link_page - Handle returned by FPDFLink_LoadWebLinks. // link_index - Zero-based index for the link. -// buffer - A unicode buffer. +// buffer - A unicode buffer for the result. // buflen - Number of characters (not bytes) for the buffer, -// including an additional terminator. +// including an additional terminator. // Return Value: -// If buffer is NULL or buflen is zero, return number of characters -// (not bytes and an additional terminator is also counted) needed, -// otherwise, return number of characters copied into the buffer. +// If |buffer| is NULL or |buflen| is zero, return the number of +// characters (not bytes) needed to buffer the result (an additional +// terminator is included in this count). +// Otherwise, copy the result into |buffer|, truncating at |buflen| if +// the result is too large to fit, and return the number of characters +// actually copied into the buffer (the additional terminator is also +// included in this count). +// If |link_index| does not correspond to a valid link, then the result +// is an empty string. // DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page, int link_index, @@ -370,7 +376,8 @@ DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page, // link_page - Handle returned by FPDFLink_LoadWebLinks. // link_index - Zero-based index for the link. // Return Value: -// Number of rectangular areas for the link. +// Number of rectangular areas for the link. If |link_index| does +// not correspond to a valid link, then 0 is returned. // DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, int link_index); @@ -382,15 +389,16 @@ DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, // link_index - Zero-based index for the link. // rect_index - Zero-based index for a rectangle. // left - Pointer to a double value receiving the rectangle -// left boundary. +// left boundary. // top - Pointer to a double value receiving the rectangle -// top boundary. +// top boundary. // right - Pointer to a double value receiving the rectangle -// right boundary. +// right boundary. // bottom - Pointer to a double value receiving the rectangle -// bottom boundary. +// bottom boundary. // Return Value: -// None. +// None. If |link_index| does not correspond to a valid link, then +// |left|, |top|, |right|, and |bottom| remain unmodified. // DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, int link_index, -- cgit v1.2.3