diff options
Diffstat (limited to 'core/fpdftext')
-rw-r--r-- | core/fpdftext/cpdf_linkextract.cpp | 74 | ||||
-rw-r--r-- | core/fpdftext/cpdf_textpagefind.cpp | 27 |
2 files changed, 52 insertions, 49 deletions
diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp index fb228ec832..cfa9dbba7f 100644 --- a/core/fpdftext/cpdf_linkextract.cpp +++ b/core/fpdftext/cpdf_linkextract.cpp @@ -22,7 +22,7 @@ namespace { FX_STRSIZE FindWebLinkEnding(const CFX_WideString& str, FX_STRSIZE start, FX_STRSIZE end) { - if (str.Find(L'/', start) != FX_STRNPOS) { + if (str.Contains(L'/', start)) { // When there is a path and query after '/', most ASCII chars are allowed. // We don't sanitize in this case. return end; @@ -33,16 +33,20 @@ FX_STRSIZE FindWebLinkEnding(const CFX_WideString& str, if (str[start] == L'[') { // IPv6 reference. // Find the end of the reference. - end = str.Find(L']', start + 1); - if (end != -1 && end > start + 1) { // Has content inside brackets. - FX_STRSIZE len = str.GetLength(); - FX_STRSIZE off = end + 1; - if (off < len && str[off] == L':') { - off++; - while (off < len && str[off] >= L'0' && str[off] <= L'9') + auto result = str.Find(L']', start + 1); + if (result.has_value()) { + end = result.value(); + if (end > start + 1) { // Has content inside brackets. + FX_STRSIZE len = str.GetLength(); + FX_STRSIZE off = end + 1; + if (off < len && str[off] == L':') { off++; - if (off > end + 2 && off <= len) // At least one digit in port number. - end = off - 1; // |off| is offset of the first invalid char. + while (off < len && str[off] >= L'0' && str[off] <= L'9') + off++; + if (off > end + 2 && + off <= len) // At least one digit in port number. + end = off - 1; // |off| is offset of the first invalid char. + } } } return end; @@ -196,20 +200,20 @@ bool CPDF_LinkExtract::CheckWebLink(CFX_WideString* strBeCheck, FX_STRSIZE len = str.GetLength(); // First, try to find the scheme. - FX_STRSIZE start = str.Find(kHttpScheme); - if (start != FX_STRNPOS) { - FX_STRSIZE off = start + kHttpSchemeLen; // move after "http". + auto start = str.Find(kHttpScheme); + if (start.has_value()) { + FX_STRSIZE off = start.value() + kHttpSchemeLen; // move after "http". if (len > off + 4) { // At least "://<char>" follows. if (str[off] == L's') // "https" scheme is accepted. off++; if (str[off] == L':' && str[off + 1] == L'/' && str[off + 2] == L'/') { off += 3; - FX_STRSIZE end = - TrimExternalBracketsFromWebLink(str, start, str.GetLength() - 1); + FX_STRSIZE end = TrimExternalBracketsFromWebLink(str, start.value(), + str.GetLength() - 1); end = FindWebLinkEnding(str, off, end); if (end > off) { // Non-empty host name. - *nStart = start; - *nCount = end - start + 1; + *nStart = start.value(); + *nCount = end - start.value() + 1; *strBeCheck = strBeCheck->Mid(*nStart, *nCount); return true; } @@ -219,13 +223,13 @@ bool CPDF_LinkExtract::CheckWebLink(CFX_WideString* strBeCheck, // When there is no scheme, try to find url starting with "www.". start = str.Find(kWWWAddrStart); - if (start != FX_STRNPOS && len > start + kWWWAddrStartLen) { - FX_STRSIZE end = - TrimExternalBracketsFromWebLink(str, start, str.GetLength() - 1); - end = FindWebLinkEnding(str, start, end); - if (end > start + kWWWAddrStartLen) { - *nStart = start; - *nCount = end - start + 1; + if (start.has_value() && len > start.value() + kWWWAddrStartLen) { + FX_STRSIZE end = TrimExternalBracketsFromWebLink(str, start.value(), + str.GetLength() - 1); + end = FindWebLinkEnding(str, start.value(), end); + if (end > start.value() + kWWWAddrStartLen) { + *nStart = start.value(); + *nCount = end - start.value() + 1; *strBeCheck = L"http://" + strBeCheck->Mid(*nStart, *nCount); return true; } @@ -234,20 +238,20 @@ bool CPDF_LinkExtract::CheckWebLink(CFX_WideString* strBeCheck, } bool CPDF_LinkExtract::CheckMailLink(CFX_WideString* str) { - FX_STRSIZE aPos = str->Find(L'@'); + auto aPos = str->Find(L'@'); // Invalid when no '@' or when starts/ends with '@'. - if (aPos == FX_STRNPOS || aPos == 0 || aPos == str->GetLength() - 1) + if (!aPos.has_value() || aPos.value() == 0 || aPos == str->GetLength() - 1) return false; // Check the local part. - int pPos = aPos; // Used to track the position of '@' or '.'. - for (int i = aPos - 1; i >= 0; i--) { + int pPos = aPos.value(); // Used to track the position of '@' or '.'. + for (int i = aPos.value() - 1; i >= 0; i--) { wchar_t ch = (*str)[i]; if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch)) continue; if (ch != L'.' || i == pPos - 1 || i == 0) { - if (i == aPos - 1) { + if (i == aPos.value() - 1) { // There is '.' or invalid char before '@'. return false; } @@ -263,21 +267,21 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString* str) { // Check the domain name part. aPos = str->Find(L'@'); - if (aPos < 1 || aPos == FX_STRNPOS) + if (!aPos.has_value() || aPos.value() == 0) return false; str->TrimRight(L'.'); // At least one '.' in domain name, but not at the beginning. // TODO(weili): RFC5322 allows domain names to be a local name without '.'. // Check whether we should remove this check. - FX_STRSIZE ePos = str->Find(L'.', aPos + 1); - if (ePos == FX_STRNPOS || ePos == aPos + 1) + auto ePos = str->Find(L'.', aPos.value() + 1); + if (!ePos.has_value() || ePos.value() == aPos.value() + 1) return false; // Validate all other chars in domain name. int nLen = str->GetLength(); pPos = 0; // Used to track the position of '.'. - for (int i = aPos + 1; i < nLen; i++) { + for (int i = aPos.value() + 1; i < nLen; i++) { wchar_t wch = (*str)[i]; if (wch == L'-' || FXSYS_iswalnum(wch)) continue; @@ -285,7 +289,7 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString* str) { if (wch != L'.' || i == pPos + 1) { // Domain name should end before invalid char. int host_end = i == pPos + 1 ? i - 2 : i - 1; - if (pPos > 0 && host_end - aPos >= 3) { + if (pPos > 0 && host_end - aPos.value() >= 3) { // Trim the ending invalid chars if there is at least one '.' and name. *str = str->Left(host_end + 1); break; @@ -295,7 +299,7 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString* str) { pPos = i; } - if (str->Find(L"mailto:") == FX_STRNPOS) + if (!str->Contains(L"mailto:")) *str = L"mailto:" + *str; return true; diff --git a/core/fpdftext/cpdf_textpagefind.cpp b/core/fpdftext/cpdf_textpagefind.cpp index 55f940cad7..fd6e3a0d00 100644 --- a/core/fpdftext/cpdf_textpagefind.cpp +++ b/core/fpdftext/cpdf_textpagefind.cpp @@ -142,9 +142,8 @@ bool CPDF_TextPageFind::FindNext() { return m_IsFind; } int nCount = pdfium::CollectionSize<int>(m_csFindWhatArray); - int nResultPos = 0; - int nStartPos = 0; - nStartPos = m_findNextStart; + pdfium::Optional<FX_STRSIZE> nResultPos = 0; + int nStartPos = m_findNextStart; bool bSpaceStart = false; for (int iWord = 0; iWord < nCount; iWord++) { CFX_WideString csWord = m_csFindWhatArray[iWord]; @@ -164,25 +163,25 @@ bool CPDF_TextPageFind::FindNext() { } int endIndex; nResultPos = m_strText.Find(csWord.c_str(), nStartPos); - if (nResultPos == FX_STRNPOS) { + if (!nResultPos.has_value()) { m_IsFind = false; return m_IsFind; } - endIndex = nResultPos + csWord.GetLength() - 1; + endIndex = nResultPos.value() + csWord.GetLength() - 1; if (iWord == 0) - m_resStart = nResultPos; + m_resStart = nResultPos.value(); bool bMatch = true; if (iWord != 0 && !bSpaceStart) { int PreResEndPos = nStartPos; int curChar = csWord[0]; CFX_WideString lastWord = m_csFindWhatArray[iWord - 1]; int lastChar = lastWord[lastWord.GetLength() - 1]; - if (nStartPos == nResultPos && + if (nStartPos == nResultPos.value() && !(IsIgnoreSpaceCharacter(lastChar) || IsIgnoreSpaceCharacter(curChar))) { bMatch = false; } - for (int d = PreResEndPos; d < nResultPos; d++) { + for (int d = PreResEndPos; d < nResultPos.value(); d++) { wchar_t strInsert = m_strText[d]; if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR && strInsert != TEXT_RETURN_CHAR && strInsert != 160) { @@ -191,19 +190,19 @@ bool CPDF_TextPageFind::FindNext() { } } } else if (bSpaceStart) { - if (nResultPos > 0) { - wchar_t strInsert = m_strText[nResultPos - 1]; + if (nResultPos.value() > 0) { + wchar_t strInsert = m_strText[nResultPos.value() - 1]; if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR && strInsert != TEXT_RETURN_CHAR && strInsert != 160) { bMatch = false; - m_resStart = nResultPos; + m_resStart = nResultPos.value(); } else { - m_resStart = nResultPos - 1; + m_resStart = nResultPos.value() - 1; } } } if (m_bMatchWholeWord && bMatch) { - bMatch = IsMatchWholeWord(m_strText, nResultPos, endIndex); + bMatch = IsMatchWholeWord(m_strText, nResultPos.value(), endIndex); } nStartPos = endIndex + 1; if (!bMatch) { @@ -214,7 +213,7 @@ bool CPDF_TextPageFind::FindNext() { nStartPos = m_resStart + m_csFindWhatArray[0].GetLength(); } } - m_resEnd = nResultPos + m_csFindWhatArray.back().GetLength() - 1; + m_resEnd = nResultPos.value() + m_csFindWhatArray.back().GetLength() - 1; m_IsFind = true; int resStart = GetCharIndex(m_resStart); int resEnd = GetCharIndex(m_resEnd); |