diff options
Diffstat (limited to 'core/fpdftext')
-rw-r--r-- | core/fpdftext/cpdf_linkextract.cpp | 50 | ||||
-rw-r--r-- | core/fpdftext/cpdf_textpage.cpp | 26 | ||||
-rw-r--r-- | core/fpdftext/cpdf_textpagefind.cpp | 45 | ||||
-rw-r--r-- | core/fpdftext/cpdf_textpagefind.h | 10 |
4 files changed, 63 insertions, 68 deletions
diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp index 91116711e6..3a38343721 100644 --- a/core/fpdftext/cpdf_linkextract.cpp +++ b/core/fpdftext/cpdf_linkextract.cpp @@ -19,9 +19,7 @@ namespace { // |end|. The purpose of this function is to separate url from the surrounding // context characters, we do not intend to fully validate the url. |str| // contains lower case characters only. -FX_STRSIZE FindWebLinkEnding(const WideString& str, - FX_STRSIZE start, - FX_STRSIZE end) { +size_t FindWebLinkEnding(const WideString& str, size_t start, size_t end) { if (str.Contains(L'/', start)) { // When there is a path and query after '/', most ASCII chars are allowed. // We don't sanitize in this case. @@ -37,8 +35,8 @@ FX_STRSIZE FindWebLinkEnding(const WideString& str, if (result.has_value()) { end = result.value(); if (end > start + 1) { // Has content inside brackets. - FX_STRSIZE len = str.GetLength(); - FX_STRSIZE off = end + 1; + size_t len = str.GetLength(); + size_t off = end + 1; if (off < len && str[off] == L':') { off++; while (off < len && str[off] >= L'0' && str[off] <= L'9') @@ -69,9 +67,9 @@ FX_STRSIZE FindWebLinkEnding(const WideString& str, // |end| if characters were removed. void TrimBackwardsToChar(const WideString& str, wchar_t charToFind, - FX_STRSIZE start, - FX_STRSIZE* end) { - for (FX_STRSIZE pos = *end; pos >= start; pos--) { + size_t start, + size_t* end) { + for (size_t pos = *end; pos >= start; pos--) { if (str[pos] == charToFind) { *end = pos - 1; break; @@ -83,10 +81,10 @@ void TrimBackwardsToChar(const WideString& str, // |start| and |end| in |str|. Matches a closing bracket or quote for each // opening character and, if present, removes everything afterwards. Returns the // new end position for the string. -FX_STRSIZE TrimExternalBracketsFromWebLink(const WideString& str, - FX_STRSIZE start, - FX_STRSIZE end) { - for (FX_STRSIZE pos = 0; pos < start; pos++) { +size_t TrimExternalBracketsFromWebLink(const WideString& str, + size_t start, + size_t end) { + for (size_t pos = 0; pos < start; pos++) { if (str[pos] == '(') { TrimBackwardsToChar(str, ')', start, &end); } else if (str[pos] == '[') { @@ -191,25 +189,25 @@ bool CPDF_LinkExtract::CheckWebLink(WideString* strBeCheck, int32_t* nStart, int32_t* nCount) { static const wchar_t kHttpScheme[] = L"http"; - static const FX_STRSIZE kHttpSchemeLen = FXSYS_len(kHttpScheme); + static const size_t kHttpSchemeLen = FXSYS_len(kHttpScheme); static const wchar_t kWWWAddrStart[] = L"www."; - static const FX_STRSIZE kWWWAddrStartLen = FXSYS_len(kWWWAddrStart); + static const size_t kWWWAddrStartLen = FXSYS_len(kWWWAddrStart); WideString str = *strBeCheck; str.MakeLower(); - FX_STRSIZE len = str.GetLength(); + size_t len = str.GetLength(); // First, try to find the scheme. auto start = str.Find(kHttpScheme); if (start.has_value()) { - FX_STRSIZE off = start.value() + kHttpSchemeLen; // move after "http". + size_t off = start.value() + kHttpSchemeLen; // move after "http". if (len > off + 4) { // At least "://<char>" follows. if (str[off] == L's') // "https" scheme is accepted. off++; if (str[off] == L':' && str[off + 1] == L'/' && str[off + 2] == L'/') { off += 3; - FX_STRSIZE end = TrimExternalBracketsFromWebLink(str, start.value(), - str.GetLength() - 1); + size_t end = TrimExternalBracketsFromWebLink(str, start.value(), + str.GetLength() - 1); end = FindWebLinkEnding(str, off, end); if (end > off) { // Non-empty host name. *nStart = start.value(); @@ -224,8 +222,8 @@ bool CPDF_LinkExtract::CheckWebLink(WideString* strBeCheck, // When there is no scheme, try to find url starting with "www.". start = str.Find(kWWWAddrStart); if (start.has_value() && len > start.value() + kWWWAddrStartLen) { - FX_STRSIZE end = TrimExternalBracketsFromWebLink(str, start.value(), - str.GetLength() - 1); + size_t end = TrimExternalBracketsFromWebLink(str, start.value(), + str.GetLength() - 1); end = FindWebLinkEnding(str, start.value(), end); if (end > start.value() + kWWWAddrStartLen) { *nStart = start.value(); @@ -244,8 +242,8 @@ bool CPDF_LinkExtract::CheckMailLink(WideString* str) { return false; // Check the local part. - FX_STRSIZE pPos = aPos.value(); // Used to track the position of '@' or '.'. - for (FX_STRSIZE i = aPos.value(); i > 0; i--) { + size_t pPos = aPos.value(); // Used to track the position of '@' or '.'. + for (size_t i = aPos.value(); i > 0; i--) { wchar_t ch = (*str)[i - 1]; if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch)) continue; @@ -257,7 +255,7 @@ bool CPDF_LinkExtract::CheckMailLink(WideString* str) { } // End extracting for other invalid chars, '.' at the beginning, or // consecutive '.'. - FX_STRSIZE removed_len = i == pPos ? i + 1 : i; + size_t removed_len = i == pPos ? i + 1 : i; *str = str->Right(str->GetLength() - removed_len); break; } @@ -279,16 +277,16 @@ bool CPDF_LinkExtract::CheckMailLink(WideString* str) { return false; // Validate all other chars in domain name. - FX_STRSIZE nLen = str->GetLength(); + size_t nLen = str->GetLength(); pPos = 0; // Used to track the position of '.'. - for (FX_STRSIZE i = aPos.value() + 1; i < nLen; i++) { + for (size_t i = aPos.value() + 1; i < nLen; i++) { wchar_t wch = (*str)[i]; if (wch == L'-' || FXSYS_iswalnum(wch)) continue; if (wch != L'.' || i == pPos + 1) { // Domain name should end before invalid char. - FX_STRSIZE host_end = i == pPos + 1 ? i - 2 : i - 1; + size_t host_end = i == pPos + 1 ? i - 2 : i - 1; if (pPos > 0 && host_end - aPos.value() >= 3) { // Trim the ending invalid chars if there is at least one '.' and name. *str = str->Left(host_end + 1); diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp index f73793cd72..fd33fb2f2f 100644 --- a/core/fpdftext/cpdf_textpage.cpp +++ b/core/fpdftext/cpdf_textpage.cpp @@ -66,7 +66,7 @@ float CalculateBaseSpace(const CPDF_TextObject* pTextObj, return baseSpace; } -FX_STRSIZE Unicode_GetNormalization(wchar_t wch, wchar_t* pDst) { +size_t Unicode_GetNormalization(wchar_t wch, wchar_t* pDst) { wch = wch & 0xFFFF; wchar_t wFind = g_UnicodeData_Normalization[wch]; if (!wFind) { @@ -93,7 +93,7 @@ FX_STRSIZE Unicode_GetNormalization(wchar_t wch, wchar_t* pDst) { while (n--) *pDst++ = *pMap++; } - return (FX_STRSIZE)wFind; + return static_cast<size_t>(wFind); } float MaskPercentFilled(const std::vector<bool>& mask, @@ -649,11 +649,11 @@ void CPDF_TextPage::AddCharInfoByLRDirection(wchar_t wChar, info.m_Index = m_TextBuf.GetLength(); if (wChar >= 0xFB00 && wChar <= 0xFB06) { wchar_t* pDst = nullptr; - FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst); + size_t nCount = Unicode_GetNormalization(wChar, pDst); if (nCount >= 1) { pDst = FX_Alloc(wchar_t, nCount); Unicode_GetNormalization(wChar, pDst); - for (FX_STRSIZE nIndex = 0; nIndex < nCount; nIndex++) { + for (size_t nIndex = 0; nIndex < nCount; nIndex++) { PAGECHAR_INFO info2 = info; info2.m_Unicode = pDst[nIndex]; info2.m_Flag = FPDFTEXT_CHAR_PIECE; @@ -679,11 +679,11 @@ void CPDF_TextPage::AddCharInfoByRLDirection(wchar_t wChar, info.m_Index = m_TextBuf.GetLength(); wChar = FX_GetMirrorChar(wChar); wchar_t* pDst = nullptr; - FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst); + size_t nCount = Unicode_GetNormalization(wChar, pDst); if (nCount >= 1) { pDst = FX_Alloc(wchar_t, nCount); Unicode_GetNormalization(wChar, pDst); - for (FX_STRSIZE nIndex = 0; nIndex < nCount; nIndex++) { + for (size_t nIndex = 0; nIndex < nCount; nIndex++) { PAGECHAR_INFO info2 = info; info2.m_Unicode = pDst[nIndex]; info2.m_Flag = FPDFTEXT_CHAR_PIECE; @@ -704,7 +704,7 @@ void CPDF_TextPage::CloseTempLine() { WideString str = m_TempTextBuf.MakeString(); bool bPrevSpace = false; - for (FX_STRSIZE i = 0; i < str.GetLength(); i++) { + for (size_t i = 0; i < str.GetLength(); i++) { if (str[i] != ' ') { bPrevSpace = false; continue; @@ -838,13 +838,12 @@ FPDFText_MarkedContent CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { return FPDFText_MarkedContent::Done; } - FX_STRSIZE nItems = actText.GetLength(); - if (nItems < 1) + if (actText.IsEmpty()) return FPDFText_MarkedContent::Pass; CPDF_Font* pFont = pTextObj->GetFont(); bExist = false; - for (FX_STRSIZE i = 0; i < nItems; i++) { + for (size_t i = 0; i < actText.GetLength(); i++) { if (pFont->CharCodeFromUnicode(actText[i]) != CPDF_Font::kInvalidCharCode) { bExist = true; break; @@ -854,7 +853,7 @@ FPDFText_MarkedContent CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { return FPDFText_MarkedContent::Pass; bExist = false; - for (FX_STRSIZE i = 0; i < nItems; i++) { + for (size_t i = 0; i < actText.GetLength(); i++) { wchar_t wChar = actText[i]; if ((wChar > 0x80 && wChar < 0xFFFD) || (wChar <= 0x80 && isprint(wChar))) { bExist = true; @@ -883,15 +882,14 @@ void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) { if (pDict) actText = pDict->GetUnicodeTextFor("ActualText"); } - FX_STRSIZE nItems = actText.GetLength(); - if (nItems < 1) + if (actText.IsEmpty()) return; CPDF_Font* pFont = pTextObj->GetFont(); CFX_Matrix matrix = pTextObj->GetTextMatrix(); matrix.Concat(Obj.m_formMatrix); - for (FX_STRSIZE k = 0; k < nItems; k++) { + for (size_t k = 0; k < actText.GetLength(); k++) { wchar_t wChar = actText[k]; if (wChar <= 0x80 && !isprint(wChar)) wChar = 0x20; diff --git a/core/fpdftext/cpdf_textpagefind.cpp b/core/fpdftext/cpdf_textpagefind.cpp index f00b8a9f4d..9f9be202d6 100644 --- a/core/fpdftext/cpdf_textpagefind.cpp +++ b/core/fpdftext/cpdf_textpagefind.cpp @@ -81,7 +81,7 @@ int CPDF_TextPageFind::GetCharIndex(int index) const { bool CPDF_TextPageFind::FindFirst(const WideString& findwhat, int flags, - pdfium::Optional<FX_STRSIZE> startPos) { + pdfium::Optional<size_t> startPos) { if (!m_pTextPage) return false; if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE)) @@ -94,7 +94,7 @@ bool CPDF_TextPageFind::FindFirst(const WideString& findwhat, m_IsFind = false; return true; } - FX_STRSIZE len = findwhatStr.GetLength(); + size_t len = findwhatStr.GetLength(); if (!m_bMatchCase) { findwhatStr.MakeLower(); m_strText.MakeLower(); @@ -102,25 +102,24 @@ bool CPDF_TextPageFind::FindFirst(const WideString& findwhat, m_bMatchWholeWord = !!(flags & FPDFTEXT_MATCHWHOLEWORD); m_findNextStart = startPos; if (!startPos.has_value()) { - if (m_strText.GetLength() > 0) + if (!m_strText.IsEmpty()) m_findPreStart = m_strText.GetLength() - 1; } else { m_findPreStart = startPos; } m_csFindWhatArray.clear(); - FX_STRSIZE i = 0; - while (i < len) { + size_t i = 0; + for (i = 0; i < len; ++i) if (findwhatStr[i] != ' ') break; - i++; - } if (i < len) ExtractFindWhat(findwhatStr); else m_csFindWhatArray.push_back(findwhatStr); if (m_csFindWhatArray.empty()) return false; + m_IsFind = true; m_resStart = 0; m_resEnd = -1; @@ -137,14 +136,14 @@ bool CPDF_TextPageFind::FindNext() { m_IsFind = false; return m_IsFind; } - FX_STRSIZE strLen = m_strText.GetLength(); + size_t strLen = m_strText.GetLength(); if (m_findNextStart.value() > strLen - 1) { m_IsFind = false; return m_IsFind; } int nCount = pdfium::CollectionSize<int>(m_csFindWhatArray); - pdfium::Optional<FX_STRSIZE> nResultPos = 0; - FX_STRSIZE nStartPos = m_findNextStart.value(); + pdfium::Optional<size_t> nResultPos = 0; + size_t nStartPos = m_findNextStart.value(); bool bSpaceStart = false; for (int iWord = 0; iWord < nCount; iWord++) { WideString csWord = m_csFindWhatArray[iWord]; @@ -162,18 +161,17 @@ bool CPDF_TextPageFind::FindNext() { } continue; } - FX_STRSIZE endIndex; nResultPos = m_strText.Find(csWord.c_str(), nStartPos); if (!nResultPos.has_value()) { m_IsFind = false; return m_IsFind; } - endIndex = nResultPos.value() + csWord.GetLength() - 1; + size_t endIndex = nResultPos.value() + csWord.GetLength() - 1; if (iWord == 0) m_resStart = nResultPos.value(); bool bMatch = true; if (iWord != 0 && !bSpaceStart) { - FX_STRSIZE PreResEndPos = nStartPos; + size_t PreResEndPos = nStartPos; int curChar = csWord[0]; WideString lastWord = m_csFindWhatArray[iWord - 1]; int lastChar = lastWord[lastWord.GetLength() - 1]; @@ -182,7 +180,7 @@ bool CPDF_TextPageFind::FindNext() { IsIgnoreSpaceCharacter(curChar))) { bMatch = false; } - for (FX_STRSIZE d = PreResEndPos; d < nResultPos.value(); d++) { + for (size_t d = PreResEndPos; d < nResultPos.value(); d++) { wchar_t strInsert = m_strText[d]; if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR && strInsert != TEXT_RETURN_CHAR && strInsert != 160) { @@ -238,20 +236,21 @@ bool CPDF_TextPageFind::FindPrev() { return m_IsFind; } CPDF_TextPageFind findEngine(m_pTextPage.Get()); - bool ret = findEngine.FindFirst(m_findWhat, m_flags, - pdfium::Optional<FX_STRSIZE>(0)); + bool ret = + findEngine.FindFirst(m_findWhat, m_flags, pdfium::Optional<size_t>(0)); if (!ret) { m_IsFind = false; return m_IsFind; } - int order = -1, MatchedCount = 0; + int order = -1; + int MatchedCount = 0; while (ret) { ret = findEngine.FindNext(); if (ret) { int order1 = findEngine.GetCurOrder(); int MatchedCount1 = findEngine.GetMatchedCount(); - if (static_cast<FX_STRSIZE>((order1 + MatchedCount1)) > - m_findPreStart.value() + 1) + int temp = order1 + MatchedCount1; + if (temp < 0 || static_cast<size_t>(temp) > m_findPreStart.value() + 1) break; order = order1; MatchedCount = MatchedCount1; @@ -292,7 +291,7 @@ void CPDF_TextPageFind::ExtractFindWhat(const WideString& findwhat) { break; } } - FX_STRSIZE pos = 0; + size_t pos = 0; while (pos < csWord.GetLength()) { WideString curStr = csWord.Mid(pos, 1); wchar_t curChar = csWord[pos]; @@ -321,13 +320,13 @@ void CPDF_TextPageFind::ExtractFindWhat(const WideString& findwhat) { } bool CPDF_TextPageFind::IsMatchWholeWord(const WideString& csPageText, - FX_STRSIZE startPos, - FX_STRSIZE endPos) { + size_t startPos, + size_t endPos) { if (startPos > endPos) return false; wchar_t char_left = 0; wchar_t char_right = 0; - FX_STRSIZE char_count = endPos - startPos + 1; + size_t char_count = endPos - startPos + 1; if (char_count == 0) return false; if (char_count == 1 && csPageText[startPos] > 255) diff --git a/core/fpdftext/cpdf_textpagefind.h b/core/fpdftext/cpdf_textpagefind.h index face4e46b4..574f05ed0d 100644 --- a/core/fpdftext/cpdf_textpagefind.h +++ b/core/fpdftext/cpdf_textpagefind.h @@ -24,7 +24,7 @@ class CPDF_TextPageFind { bool FindFirst(const WideString& findwhat, int flags, - pdfium::Optional<FX_STRSIZE> startPos); + pdfium::Optional<size_t> startPos); bool FindNext(); bool FindPrev(); int GetCurOrder() const; @@ -33,8 +33,8 @@ class CPDF_TextPageFind { protected: void ExtractFindWhat(const WideString& findwhat); bool IsMatchWholeWord(const WideString& csPageText, - FX_STRSIZE startPos, - FX_STRSIZE endPos); + size_t startPos, + size_t endPos); bool ExtractSubString(WideString& rString, const wchar_t* lpszFullString, int iSubString, @@ -48,8 +48,8 @@ class CPDF_TextPageFind { WideString m_findWhat; int m_flags; std::vector<WideString> m_csFindWhatArray; - pdfium::Optional<FX_STRSIZE> m_findNextStart; - pdfium::Optional<FX_STRSIZE> m_findPreStart; + pdfium::Optional<size_t> m_findNextStart; + pdfium::Optional<size_t> m_findPreStart; bool m_bMatchCase; bool m_bMatchWholeWord; int m_resStart; |