summaryrefslogtreecommitdiff
path: root/core/fpdftext/cpdf_linkextract.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'core/fpdftext/cpdf_linkextract.cpp')
-rw-r--r--core/fpdftext/cpdf_linkextract.cpp50
1 files changed, 31 insertions, 19 deletions
diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp
index 315cff12a0..98bf915519 100644
--- a/core/fpdftext/cpdf_linkextract.cpp
+++ b/core/fpdftext/cpdf_linkextract.cpp
@@ -121,9 +121,15 @@ void CPDF_LinkExtract::ParseLink() {
}
// Check for potential web URLs and email addresses.
// Ftp address, file system links, data, blob etc. are not checked.
- if (nCount > 5 &&
- (CheckWebLink(strBeCheck) || CheckMailLink(strBeCheck))) {
- m_LinkArray.push_back({start, nCount, strBeCheck});
+ if (nCount > 5) {
+ int32_t nStartOffset;
+ int32_t nCountOverload;
+ if (CheckWebLink(&strBeCheck, &nStartOffset, &nCountOverload)) {
+ m_LinkArray.push_back(
+ {start + nStartOffset, nCountOverload, strBeCheck});
+ } else if (CheckMailLink(&strBeCheck)) {
+ m_LinkArray.push_back({start, nCount, strBeCheck});
+ }
}
}
start = ++pos;
@@ -136,13 +142,15 @@ void CPDF_LinkExtract::ParseLink() {
}
}
-bool CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) {
+bool CPDF_LinkExtract::CheckWebLink(CFX_WideString* strBeCheck,
+ int32_t* nStart,
+ int32_t* nCount) {
static const wchar_t kHttpScheme[] = L"http";
static const FX_STRSIZE kHttpSchemeLen = FXSYS_len(kHttpScheme);
static const wchar_t kWWWAddrStart[] = L"www.";
static const FX_STRSIZE kWWWAddrStartLen = FXSYS_len(kWWWAddrStart);
- CFX_WideString str = strBeCheck;
+ CFX_WideString str = *strBeCheck;
str.MakeLower();
FX_STRSIZE len = str.GetLength();
@@ -156,7 +164,9 @@ bool CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) {
if (str[off] == L':' && str[off + 1] == L'/' && str[off + 2] == L'/') {
FX_STRSIZE end = FindWebLinkEnding(str, off + 3);
if (end > off + 3) { // Non-empty host name.
- strBeCheck = strBeCheck.Mid(start, end - start + 1);
+ *nStart = start;
+ *nCount = end - start + 1;
+ *strBeCheck = strBeCheck->Mid(*nStart, *nCount);
return true;
}
}
@@ -168,15 +178,17 @@ bool CPDF_LinkExtract::CheckWebLink(CFX_WideString& strBeCheck) {
if (start != -1 && len > start + kWWWAddrStartLen) {
FX_STRSIZE end = FindWebLinkEnding(str, start);
if (end > start + kWWWAddrStartLen) {
- strBeCheck = L"http://" + strBeCheck.Mid(start, end - start + 1);
+ *nStart = start;
+ *nCount = end - start + 1;
+ *strBeCheck = L"http://" + strBeCheck->Mid(*nStart, *nCount);
return true;
}
}
return false;
}
-bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {
- int aPos = str.Find(L'@');
+bool CPDF_LinkExtract::CheckMailLink(CFX_WideString* str) {
+ int aPos = str->Find(L'@');
// Invalid when no '@'.
if (aPos < 1)
return false;
@@ -184,7 +196,7 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {
// Check the local part.
int pPos = aPos; // Used to track the position of '@' or '.'.
for (int i = aPos - 1; i >= 0; i--) {
- wchar_t ch = str.GetAt(i);
+ wchar_t ch = str->GetAt(i);
if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch))
continue;
@@ -196,7 +208,7 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {
// End extracting for other invalid chars, '.' at the beginning, or
// consecutive '.'.
int removed_len = i == pPos - 1 ? i + 2 : i + 1;
- str = str.Right(str.GetLength() - removed_len);
+ *str = str->Right(str->GetLength() - removed_len);
break;
}
// Found a valid '.'.
@@ -204,23 +216,23 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {
}
// Check the domain name part.
- aPos = str.Find(L'@');
+ aPos = str->Find(L'@');
if (aPos < 1)
return false;
- str.TrimRight(L'.');
+ str->TrimRight(L'.');
// At least one '.' in domain name, but not at the beginning.
// TODO(weili): RFC5322 allows domain names to be a local name without '.'.
// Check whether we should remove this check.
- int ePos = str.Find(L'.', aPos + 1);
+ int ePos = str->Find(L'.', aPos + 1);
if (ePos == -1 || ePos == aPos + 1)
return false;
// Validate all other chars in domain name.
- int nLen = str.GetLength();
+ int nLen = str->GetLength();
pPos = 0; // Used to track the position of '.'.
for (int i = aPos + 1; i < nLen; i++) {
- wchar_t wch = str.GetAt(i);
+ wchar_t wch = str->GetAt(i);
if (wch == L'-' || FXSYS_iswalnum(wch))
continue;
@@ -229,7 +241,7 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {
int host_end = i == pPos + 1 ? i - 2 : i - 1;
if (pPos > 0 && host_end - aPos >= 3) {
// Trim the ending invalid chars if there is at least one '.' and name.
- str = str.Left(host_end + 1);
+ *str = str->Left(host_end + 1);
break;
}
return false;
@@ -237,8 +249,8 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString& str) {
pPos = i;
}
- if (str.Find(L"mailto:") == -1)
- str = L"mailto:" + str;
+ if (str->Find(L"mailto:") == -1)
+ *str = L"mailto:" + *str;
return true;
}