summaryrefslogtreecommitdiff
path: root/core/fpdftext
diff options
context:
space:
mode:
authorRyan Harrison <rharrison@chromium.org>2017-08-23 10:39:35 -0400
committerChromium commit bot <commit-bot@chromium.org>2017-08-23 15:11:19 +0000
commit12db7515f17228798d1aa38fce0fee3e7d2d36b6 (patch)
treee291daf9e6a88ba0248670b9f1ba3a555f052538 /core/fpdftext
parent3bb0a34cc75abe49a59c6390353957bbb5c5ab38 (diff)
downloadpdfium-12db7515f17228798d1aa38fce0fee3e7d2d36b6.tar.xz
Convert string Find methods to return an Optional
The Find and ReverseFind methods for WideString, WideStringC, ByteString, and ByteStringC have been converted from returning a raw FX_STRSIZE, to returning Optional<FX_STRSIZE>, so that success/failure can be indicated without using FX_STRNPOS. This allows for removing FX_STRNPOS and by association makes the conversion of FX_STRSIZE to size_t easier, since it forces checking the return value of Find to be explictly done as well as taking the error value out of the range of FX_STRSIZE. New Contains methods have been added for cases where the success or failure is all the call site to Find cared about, and the actual position was ignored. BUG=pdfium:828 Change-Id: Id827e508c8660affa68cc08a13d96121369364b7 Reviewed-on: https://pdfium-review.googlesource.com/11350 Commit-Queue: Ryan Harrison <rharrison@chromium.org> Reviewed-by: dsinclair <dsinclair@chromium.org>
Diffstat (limited to 'core/fpdftext')
-rw-r--r--core/fpdftext/cpdf_linkextract.cpp74
-rw-r--r--core/fpdftext/cpdf_textpagefind.cpp27
2 files changed, 52 insertions, 49 deletions
diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp
index fb228ec832..cfa9dbba7f 100644
--- a/core/fpdftext/cpdf_linkextract.cpp
+++ b/core/fpdftext/cpdf_linkextract.cpp
@@ -22,7 +22,7 @@ namespace {
FX_STRSIZE FindWebLinkEnding(const CFX_WideString& str,
FX_STRSIZE start,
FX_STRSIZE end) {
- if (str.Find(L'/', start) != FX_STRNPOS) {
+ if (str.Contains(L'/', start)) {
// When there is a path and query after '/', most ASCII chars are allowed.
// We don't sanitize in this case.
return end;
@@ -33,16 +33,20 @@ FX_STRSIZE FindWebLinkEnding(const CFX_WideString& str,
if (str[start] == L'[') {
// IPv6 reference.
// Find the end of the reference.
- end = str.Find(L']', start + 1);
- if (end != -1 && end > start + 1) { // Has content inside brackets.
- FX_STRSIZE len = str.GetLength();
- FX_STRSIZE off = end + 1;
- if (off < len && str[off] == L':') {
- off++;
- while (off < len && str[off] >= L'0' && str[off] <= L'9')
+ auto result = str.Find(L']', start + 1);
+ if (result.has_value()) {
+ end = result.value();
+ if (end > start + 1) { // Has content inside brackets.
+ FX_STRSIZE len = str.GetLength();
+ FX_STRSIZE off = end + 1;
+ if (off < len && str[off] == L':') {
off++;
- if (off > end + 2 && off <= len) // At least one digit in port number.
- end = off - 1; // |off| is offset of the first invalid char.
+ while (off < len && str[off] >= L'0' && str[off] <= L'9')
+ off++;
+ if (off > end + 2 &&
+ off <= len) // At least one digit in port number.
+ end = off - 1; // |off| is offset of the first invalid char.
+ }
}
}
return end;
@@ -196,20 +200,20 @@ bool CPDF_LinkExtract::CheckWebLink(CFX_WideString* strBeCheck,
FX_STRSIZE len = str.GetLength();
// First, try to find the scheme.
- FX_STRSIZE start = str.Find(kHttpScheme);
- if (start != FX_STRNPOS) {
- FX_STRSIZE off = start + kHttpSchemeLen; // move after "http".
+ auto start = str.Find(kHttpScheme);
+ if (start.has_value()) {
+ FX_STRSIZE off = start.value() + kHttpSchemeLen; // move after "http".
if (len > off + 4) { // At least "://<char>" follows.
if (str[off] == L's') // "https" scheme is accepted.
off++;
if (str[off] == L':' && str[off + 1] == L'/' && str[off + 2] == L'/') {
off += 3;
- FX_STRSIZE end =
- TrimExternalBracketsFromWebLink(str, start, str.GetLength() - 1);
+ FX_STRSIZE end = TrimExternalBracketsFromWebLink(str, start.value(),
+ str.GetLength() - 1);
end = FindWebLinkEnding(str, off, end);
if (end > off) { // Non-empty host name.
- *nStart = start;
- *nCount = end - start + 1;
+ *nStart = start.value();
+ *nCount = end - start.value() + 1;
*strBeCheck = strBeCheck->Mid(*nStart, *nCount);
return true;
}
@@ -219,13 +223,13 @@ bool CPDF_LinkExtract::CheckWebLink(CFX_WideString* strBeCheck,
// When there is no scheme, try to find url starting with "www.".
start = str.Find(kWWWAddrStart);
- if (start != FX_STRNPOS && len > start + kWWWAddrStartLen) {
- FX_STRSIZE end =
- TrimExternalBracketsFromWebLink(str, start, str.GetLength() - 1);
- end = FindWebLinkEnding(str, start, end);
- if (end > start + kWWWAddrStartLen) {
- *nStart = start;
- *nCount = end - start + 1;
+ if (start.has_value() && len > start.value() + kWWWAddrStartLen) {
+ FX_STRSIZE end = TrimExternalBracketsFromWebLink(str, start.value(),
+ str.GetLength() - 1);
+ end = FindWebLinkEnding(str, start.value(), end);
+ if (end > start.value() + kWWWAddrStartLen) {
+ *nStart = start.value();
+ *nCount = end - start.value() + 1;
*strBeCheck = L"http://" + strBeCheck->Mid(*nStart, *nCount);
return true;
}
@@ -234,20 +238,20 @@ bool CPDF_LinkExtract::CheckWebLink(CFX_WideString* strBeCheck,
}
bool CPDF_LinkExtract::CheckMailLink(CFX_WideString* str) {
- FX_STRSIZE aPos = str->Find(L'@');
+ auto aPos = str->Find(L'@');
// Invalid when no '@' or when starts/ends with '@'.
- if (aPos == FX_STRNPOS || aPos == 0 || aPos == str->GetLength() - 1)
+ if (!aPos.has_value() || aPos.value() == 0 || aPos == str->GetLength() - 1)
return false;
// Check the local part.
- int pPos = aPos; // Used to track the position of '@' or '.'.
- for (int i = aPos - 1; i >= 0; i--) {
+ int pPos = aPos.value(); // Used to track the position of '@' or '.'.
+ for (int i = aPos.value() - 1; i >= 0; i--) {
wchar_t ch = (*str)[i];
if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch))
continue;
if (ch != L'.' || i == pPos - 1 || i == 0) {
- if (i == aPos - 1) {
+ if (i == aPos.value() - 1) {
// There is '.' or invalid char before '@'.
return false;
}
@@ -263,21 +267,21 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString* str) {
// Check the domain name part.
aPos = str->Find(L'@');
- if (aPos < 1 || aPos == FX_STRNPOS)
+ if (!aPos.has_value() || aPos.value() == 0)
return false;
str->TrimRight(L'.');
// At least one '.' in domain name, but not at the beginning.
// TODO(weili): RFC5322 allows domain names to be a local name without '.'.
// Check whether we should remove this check.
- FX_STRSIZE ePos = str->Find(L'.', aPos + 1);
- if (ePos == FX_STRNPOS || ePos == aPos + 1)
+ auto ePos = str->Find(L'.', aPos.value() + 1);
+ if (!ePos.has_value() || ePos.value() == aPos.value() + 1)
return false;
// Validate all other chars in domain name.
int nLen = str->GetLength();
pPos = 0; // Used to track the position of '.'.
- for (int i = aPos + 1; i < nLen; i++) {
+ for (int i = aPos.value() + 1; i < nLen; i++) {
wchar_t wch = (*str)[i];
if (wch == L'-' || FXSYS_iswalnum(wch))
continue;
@@ -285,7 +289,7 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString* str) {
if (wch != L'.' || i == pPos + 1) {
// Domain name should end before invalid char.
int host_end = i == pPos + 1 ? i - 2 : i - 1;
- if (pPos > 0 && host_end - aPos >= 3) {
+ if (pPos > 0 && host_end - aPos.value() >= 3) {
// Trim the ending invalid chars if there is at least one '.' and name.
*str = str->Left(host_end + 1);
break;
@@ -295,7 +299,7 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString* str) {
pPos = i;
}
- if (str->Find(L"mailto:") == FX_STRNPOS)
+ if (!str->Contains(L"mailto:"))
*str = L"mailto:" + *str;
return true;
diff --git a/core/fpdftext/cpdf_textpagefind.cpp b/core/fpdftext/cpdf_textpagefind.cpp
index 55f940cad7..fd6e3a0d00 100644
--- a/core/fpdftext/cpdf_textpagefind.cpp
+++ b/core/fpdftext/cpdf_textpagefind.cpp
@@ -142,9 +142,8 @@ bool CPDF_TextPageFind::FindNext() {
return m_IsFind;
}
int nCount = pdfium::CollectionSize<int>(m_csFindWhatArray);
- int nResultPos = 0;
- int nStartPos = 0;
- nStartPos = m_findNextStart;
+ pdfium::Optional<FX_STRSIZE> nResultPos = 0;
+ int nStartPos = m_findNextStart;
bool bSpaceStart = false;
for (int iWord = 0; iWord < nCount; iWord++) {
CFX_WideString csWord = m_csFindWhatArray[iWord];
@@ -164,25 +163,25 @@ bool CPDF_TextPageFind::FindNext() {
}
int endIndex;
nResultPos = m_strText.Find(csWord.c_str(), nStartPos);
- if (nResultPos == FX_STRNPOS) {
+ if (!nResultPos.has_value()) {
m_IsFind = false;
return m_IsFind;
}
- endIndex = nResultPos + csWord.GetLength() - 1;
+ endIndex = nResultPos.value() + csWord.GetLength() - 1;
if (iWord == 0)
- m_resStart = nResultPos;
+ m_resStart = nResultPos.value();
bool bMatch = true;
if (iWord != 0 && !bSpaceStart) {
int PreResEndPos = nStartPos;
int curChar = csWord[0];
CFX_WideString lastWord = m_csFindWhatArray[iWord - 1];
int lastChar = lastWord[lastWord.GetLength() - 1];
- if (nStartPos == nResultPos &&
+ if (nStartPos == nResultPos.value() &&
!(IsIgnoreSpaceCharacter(lastChar) ||
IsIgnoreSpaceCharacter(curChar))) {
bMatch = false;
}
- for (int d = PreResEndPos; d < nResultPos; d++) {
+ for (int d = PreResEndPos; d < nResultPos.value(); d++) {
wchar_t strInsert = m_strText[d];
if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR &&
strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
@@ -191,19 +190,19 @@ bool CPDF_TextPageFind::FindNext() {
}
}
} else if (bSpaceStart) {
- if (nResultPos > 0) {
- wchar_t strInsert = m_strText[nResultPos - 1];
+ if (nResultPos.value() > 0) {
+ wchar_t strInsert = m_strText[nResultPos.value() - 1];
if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR &&
strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
bMatch = false;
- m_resStart = nResultPos;
+ m_resStart = nResultPos.value();
} else {
- m_resStart = nResultPos - 1;
+ m_resStart = nResultPos.value() - 1;
}
}
}
if (m_bMatchWholeWord && bMatch) {
- bMatch = IsMatchWholeWord(m_strText, nResultPos, endIndex);
+ bMatch = IsMatchWholeWord(m_strText, nResultPos.value(), endIndex);
}
nStartPos = endIndex + 1;
if (!bMatch) {
@@ -214,7 +213,7 @@ bool CPDF_TextPageFind::FindNext() {
nStartPos = m_resStart + m_csFindWhatArray[0].GetLength();
}
}
- m_resEnd = nResultPos + m_csFindWhatArray.back().GetLength() - 1;
+ m_resEnd = nResultPos.value() + m_csFindWhatArray.back().GetLength() - 1;
m_IsFind = true;
int resStart = GetCharIndex(m_resStart);
int resEnd = GetCharIndex(m_resEnd);