summaryrefslogtreecommitdiff
path: root/core/fpdftext/cpdf_linkextract.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'core/fpdftext/cpdf_linkextract.cpp')
-rw-r--r--core/fpdftext/cpdf_linkextract.cpp50
1 files changed, 24 insertions, 26 deletions
diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp
index 91116711e6..3a38343721 100644
--- a/core/fpdftext/cpdf_linkextract.cpp
+++ b/core/fpdftext/cpdf_linkextract.cpp
@@ -19,9 +19,7 @@ namespace {
// |end|. The purpose of this function is to separate url from the surrounding
// context characters, we do not intend to fully validate the url. |str|
// contains lower case characters only.
-FX_STRSIZE FindWebLinkEnding(const WideString& str,
- FX_STRSIZE start,
- FX_STRSIZE end) {
+size_t FindWebLinkEnding(const WideString& str, size_t start, size_t end) {
if (str.Contains(L'/', start)) {
// When there is a path and query after '/', most ASCII chars are allowed.
// We don't sanitize in this case.
@@ -37,8 +35,8 @@ FX_STRSIZE FindWebLinkEnding(const WideString& str,
if (result.has_value()) {
end = result.value();
if (end > start + 1) { // Has content inside brackets.
- FX_STRSIZE len = str.GetLength();
- FX_STRSIZE off = end + 1;
+ size_t len = str.GetLength();
+ size_t off = end + 1;
if (off < len && str[off] == L':') {
off++;
while (off < len && str[off] >= L'0' && str[off] <= L'9')
@@ -69,9 +67,9 @@ FX_STRSIZE FindWebLinkEnding(const WideString& str,
// |end| if characters were removed.
void TrimBackwardsToChar(const WideString& str,
wchar_t charToFind,
- FX_STRSIZE start,
- FX_STRSIZE* end) {
- for (FX_STRSIZE pos = *end; pos >= start; pos--) {
+ size_t start,
+ size_t* end) {
+ for (size_t pos = *end; pos >= start; pos--) {
if (str[pos] == charToFind) {
*end = pos - 1;
break;
@@ -83,10 +81,10 @@ void TrimBackwardsToChar(const WideString& str,
// |start| and |end| in |str|. Matches a closing bracket or quote for each
// opening character and, if present, removes everything afterwards. Returns the
// new end position for the string.
-FX_STRSIZE TrimExternalBracketsFromWebLink(const WideString& str,
- FX_STRSIZE start,
- FX_STRSIZE end) {
- for (FX_STRSIZE pos = 0; pos < start; pos++) {
+size_t TrimExternalBracketsFromWebLink(const WideString& str,
+ size_t start,
+ size_t end) {
+ for (size_t pos = 0; pos < start; pos++) {
if (str[pos] == '(') {
TrimBackwardsToChar(str, ')', start, &end);
} else if (str[pos] == '[') {
@@ -191,25 +189,25 @@ bool CPDF_LinkExtract::CheckWebLink(WideString* strBeCheck,
int32_t* nStart,
int32_t* nCount) {
static const wchar_t kHttpScheme[] = L"http";
- static const FX_STRSIZE kHttpSchemeLen = FXSYS_len(kHttpScheme);
+ static const size_t kHttpSchemeLen = FXSYS_len(kHttpScheme);
static const wchar_t kWWWAddrStart[] = L"www.";
- static const FX_STRSIZE kWWWAddrStartLen = FXSYS_len(kWWWAddrStart);
+ static const size_t kWWWAddrStartLen = FXSYS_len(kWWWAddrStart);
WideString str = *strBeCheck;
str.MakeLower();
- FX_STRSIZE len = str.GetLength();
+ size_t len = str.GetLength();
// First, try to find the scheme.
auto start = str.Find(kHttpScheme);
if (start.has_value()) {
- FX_STRSIZE off = start.value() + kHttpSchemeLen; // move after "http".
+ size_t off = start.value() + kHttpSchemeLen; // move after "http".
if (len > off + 4) { // At least "://<char>" follows.
if (str[off] == L's') // "https" scheme is accepted.
off++;
if (str[off] == L':' && str[off + 1] == L'/' && str[off + 2] == L'/') {
off += 3;
- FX_STRSIZE end = TrimExternalBracketsFromWebLink(str, start.value(),
- str.GetLength() - 1);
+ size_t end = TrimExternalBracketsFromWebLink(str, start.value(),
+ str.GetLength() - 1);
end = FindWebLinkEnding(str, off, end);
if (end > off) { // Non-empty host name.
*nStart = start.value();
@@ -224,8 +222,8 @@ bool CPDF_LinkExtract::CheckWebLink(WideString* strBeCheck,
// When there is no scheme, try to find url starting with "www.".
start = str.Find(kWWWAddrStart);
if (start.has_value() && len > start.value() + kWWWAddrStartLen) {
- FX_STRSIZE end = TrimExternalBracketsFromWebLink(str, start.value(),
- str.GetLength() - 1);
+ size_t end = TrimExternalBracketsFromWebLink(str, start.value(),
+ str.GetLength() - 1);
end = FindWebLinkEnding(str, start.value(), end);
if (end > start.value() + kWWWAddrStartLen) {
*nStart = start.value();
@@ -244,8 +242,8 @@ bool CPDF_LinkExtract::CheckMailLink(WideString* str) {
return false;
// Check the local part.
- FX_STRSIZE pPos = aPos.value(); // Used to track the position of '@' or '.'.
- for (FX_STRSIZE i = aPos.value(); i > 0; i--) {
+ size_t pPos = aPos.value(); // Used to track the position of '@' or '.'.
+ for (size_t i = aPos.value(); i > 0; i--) {
wchar_t ch = (*str)[i - 1];
if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch))
continue;
@@ -257,7 +255,7 @@ bool CPDF_LinkExtract::CheckMailLink(WideString* str) {
}
// End extracting for other invalid chars, '.' at the beginning, or
// consecutive '.'.
- FX_STRSIZE removed_len = i == pPos ? i + 1 : i;
+ size_t removed_len = i == pPos ? i + 1 : i;
*str = str->Right(str->GetLength() - removed_len);
break;
}
@@ -279,16 +277,16 @@ bool CPDF_LinkExtract::CheckMailLink(WideString* str) {
return false;
// Validate all other chars in domain name.
- FX_STRSIZE nLen = str->GetLength();
+ size_t nLen = str->GetLength();
pPos = 0; // Used to track the position of '.'.
- for (FX_STRSIZE i = aPos.value() + 1; i < nLen; i++) {
+ for (size_t i = aPos.value() + 1; i < nLen; i++) {
wchar_t wch = (*str)[i];
if (wch == L'-' || FXSYS_iswalnum(wch))
continue;
if (wch != L'.' || i == pPos + 1) {
// Domain name should end before invalid char.
- FX_STRSIZE host_end = i == pPos + 1 ? i - 2 : i - 1;
+ size_t host_end = i == pPos + 1 ? i - 2 : i - 1;
if (pPos > 0 && host_end - aPos.value() >= 3) {
// Trim the ending invalid chars if there is at least one '.' and name.
*str = str->Left(host_end + 1);