summaryrefslogtreecommitdiff
path: root/core/fpdftext
diff options
context:
space:
mode:
Diffstat (limited to 'core/fpdftext')
-rw-r--r--core/fpdftext/cpdf_linkextract.cpp50
-rw-r--r--core/fpdftext/cpdf_textpage.cpp26
-rw-r--r--core/fpdftext/cpdf_textpagefind.cpp45
-rw-r--r--core/fpdftext/cpdf_textpagefind.h10
4 files changed, 63 insertions, 68 deletions
diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp
index 91116711e6..3a38343721 100644
--- a/core/fpdftext/cpdf_linkextract.cpp
+++ b/core/fpdftext/cpdf_linkextract.cpp
@@ -19,9 +19,7 @@ namespace {
// |end|. The purpose of this function is to separate url from the surrounding
// context characters, we do not intend to fully validate the url. |str|
// contains lower case characters only.
-FX_STRSIZE FindWebLinkEnding(const WideString& str,
- FX_STRSIZE start,
- FX_STRSIZE end) {
+size_t FindWebLinkEnding(const WideString& str, size_t start, size_t end) {
if (str.Contains(L'/', start)) {
// When there is a path and query after '/', most ASCII chars are allowed.
// We don't sanitize in this case.
@@ -37,8 +35,8 @@ FX_STRSIZE FindWebLinkEnding(const WideString& str,
if (result.has_value()) {
end = result.value();
if (end > start + 1) { // Has content inside brackets.
- FX_STRSIZE len = str.GetLength();
- FX_STRSIZE off = end + 1;
+ size_t len = str.GetLength();
+ size_t off = end + 1;
if (off < len && str[off] == L':') {
off++;
while (off < len && str[off] >= L'0' && str[off] <= L'9')
@@ -69,9 +67,9 @@ FX_STRSIZE FindWebLinkEnding(const WideString& str,
// |end| if characters were removed.
void TrimBackwardsToChar(const WideString& str,
wchar_t charToFind,
- FX_STRSIZE start,
- FX_STRSIZE* end) {
- for (FX_STRSIZE pos = *end; pos >= start; pos--) {
+ size_t start,
+ size_t* end) {
+ for (size_t pos = *end; pos >= start; pos--) {
if (str[pos] == charToFind) {
*end = pos - 1;
break;
@@ -83,10 +81,10 @@ void TrimBackwardsToChar(const WideString& str,
// |start| and |end| in |str|. Matches a closing bracket or quote for each
// opening character and, if present, removes everything afterwards. Returns the
// new end position for the string.
-FX_STRSIZE TrimExternalBracketsFromWebLink(const WideString& str,
- FX_STRSIZE start,
- FX_STRSIZE end) {
- for (FX_STRSIZE pos = 0; pos < start; pos++) {
+size_t TrimExternalBracketsFromWebLink(const WideString& str,
+ size_t start,
+ size_t end) {
+ for (size_t pos = 0; pos < start; pos++) {
if (str[pos] == '(') {
TrimBackwardsToChar(str, ')', start, &end);
} else if (str[pos] == '[') {
@@ -191,25 +189,25 @@ bool CPDF_LinkExtract::CheckWebLink(WideString* strBeCheck,
int32_t* nStart,
int32_t* nCount) {
static const wchar_t kHttpScheme[] = L"http";
- static const FX_STRSIZE kHttpSchemeLen = FXSYS_len(kHttpScheme);
+ static const size_t kHttpSchemeLen = FXSYS_len(kHttpScheme);
static const wchar_t kWWWAddrStart[] = L"www.";
- static const FX_STRSIZE kWWWAddrStartLen = FXSYS_len(kWWWAddrStart);
+ static const size_t kWWWAddrStartLen = FXSYS_len(kWWWAddrStart);
WideString str = *strBeCheck;
str.MakeLower();
- FX_STRSIZE len = str.GetLength();
+ size_t len = str.GetLength();
// First, try to find the scheme.
auto start = str.Find(kHttpScheme);
if (start.has_value()) {
- FX_STRSIZE off = start.value() + kHttpSchemeLen; // move after "http".
+ size_t off = start.value() + kHttpSchemeLen; // move after "http".
if (len > off + 4) { // At least "://<char>" follows.
if (str[off] == L's') // "https" scheme is accepted.
off++;
if (str[off] == L':' && str[off + 1] == L'/' && str[off + 2] == L'/') {
off += 3;
- FX_STRSIZE end = TrimExternalBracketsFromWebLink(str, start.value(),
- str.GetLength() - 1);
+ size_t end = TrimExternalBracketsFromWebLink(str, start.value(),
+ str.GetLength() - 1);
end = FindWebLinkEnding(str, off, end);
if (end > off) { // Non-empty host name.
*nStart = start.value();
@@ -224,8 +222,8 @@ bool CPDF_LinkExtract::CheckWebLink(WideString* strBeCheck,
// When there is no scheme, try to find url starting with "www.".
start = str.Find(kWWWAddrStart);
if (start.has_value() && len > start.value() + kWWWAddrStartLen) {
- FX_STRSIZE end = TrimExternalBracketsFromWebLink(str, start.value(),
- str.GetLength() - 1);
+ size_t end = TrimExternalBracketsFromWebLink(str, start.value(),
+ str.GetLength() - 1);
end = FindWebLinkEnding(str, start.value(), end);
if (end > start.value() + kWWWAddrStartLen) {
*nStart = start.value();
@@ -244,8 +242,8 @@ bool CPDF_LinkExtract::CheckMailLink(WideString* str) {
return false;
// Check the local part.
- FX_STRSIZE pPos = aPos.value(); // Used to track the position of '@' or '.'.
- for (FX_STRSIZE i = aPos.value(); i > 0; i--) {
+ size_t pPos = aPos.value(); // Used to track the position of '@' or '.'.
+ for (size_t i = aPos.value(); i > 0; i--) {
wchar_t ch = (*str)[i - 1];
if (ch == L'_' || ch == L'-' || FXSYS_iswalnum(ch))
continue;
@@ -257,7 +255,7 @@ bool CPDF_LinkExtract::CheckMailLink(WideString* str) {
}
// End extracting for other invalid chars, '.' at the beginning, or
// consecutive '.'.
- FX_STRSIZE removed_len = i == pPos ? i + 1 : i;
+ size_t removed_len = i == pPos ? i + 1 : i;
*str = str->Right(str->GetLength() - removed_len);
break;
}
@@ -279,16 +277,16 @@ bool CPDF_LinkExtract::CheckMailLink(WideString* str) {
return false;
// Validate all other chars in domain name.
- FX_STRSIZE nLen = str->GetLength();
+ size_t nLen = str->GetLength();
pPos = 0; // Used to track the position of '.'.
- for (FX_STRSIZE i = aPos.value() + 1; i < nLen; i++) {
+ for (size_t i = aPos.value() + 1; i < nLen; i++) {
wchar_t wch = (*str)[i];
if (wch == L'-' || FXSYS_iswalnum(wch))
continue;
if (wch != L'.' || i == pPos + 1) {
// Domain name should end before invalid char.
- FX_STRSIZE host_end = i == pPos + 1 ? i - 2 : i - 1;
+ size_t host_end = i == pPos + 1 ? i - 2 : i - 1;
if (pPos > 0 && host_end - aPos.value() >= 3) {
// Trim the ending invalid chars if there is at least one '.' and name.
*str = str->Left(host_end + 1);
diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp
index f73793cd72..fd33fb2f2f 100644
--- a/core/fpdftext/cpdf_textpage.cpp
+++ b/core/fpdftext/cpdf_textpage.cpp
@@ -66,7 +66,7 @@ float CalculateBaseSpace(const CPDF_TextObject* pTextObj,
return baseSpace;
}
-FX_STRSIZE Unicode_GetNormalization(wchar_t wch, wchar_t* pDst) {
+size_t Unicode_GetNormalization(wchar_t wch, wchar_t* pDst) {
wch = wch & 0xFFFF;
wchar_t wFind = g_UnicodeData_Normalization[wch];
if (!wFind) {
@@ -93,7 +93,7 @@ FX_STRSIZE Unicode_GetNormalization(wchar_t wch, wchar_t* pDst) {
while (n--)
*pDst++ = *pMap++;
}
- return (FX_STRSIZE)wFind;
+ return static_cast<size_t>(wFind);
}
float MaskPercentFilled(const std::vector<bool>& mask,
@@ -649,11 +649,11 @@ void CPDF_TextPage::AddCharInfoByLRDirection(wchar_t wChar,
info.m_Index = m_TextBuf.GetLength();
if (wChar >= 0xFB00 && wChar <= 0xFB06) {
wchar_t* pDst = nullptr;
- FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst);
+ size_t nCount = Unicode_GetNormalization(wChar, pDst);
if (nCount >= 1) {
pDst = FX_Alloc(wchar_t, nCount);
Unicode_GetNormalization(wChar, pDst);
- for (FX_STRSIZE nIndex = 0; nIndex < nCount; nIndex++) {
+ for (size_t nIndex = 0; nIndex < nCount; nIndex++) {
PAGECHAR_INFO info2 = info;
info2.m_Unicode = pDst[nIndex];
info2.m_Flag = FPDFTEXT_CHAR_PIECE;
@@ -679,11 +679,11 @@ void CPDF_TextPage::AddCharInfoByRLDirection(wchar_t wChar,
info.m_Index = m_TextBuf.GetLength();
wChar = FX_GetMirrorChar(wChar);
wchar_t* pDst = nullptr;
- FX_STRSIZE nCount = Unicode_GetNormalization(wChar, pDst);
+ size_t nCount = Unicode_GetNormalization(wChar, pDst);
if (nCount >= 1) {
pDst = FX_Alloc(wchar_t, nCount);
Unicode_GetNormalization(wChar, pDst);
- for (FX_STRSIZE nIndex = 0; nIndex < nCount; nIndex++) {
+ for (size_t nIndex = 0; nIndex < nCount; nIndex++) {
PAGECHAR_INFO info2 = info;
info2.m_Unicode = pDst[nIndex];
info2.m_Flag = FPDFTEXT_CHAR_PIECE;
@@ -704,7 +704,7 @@ void CPDF_TextPage::CloseTempLine() {
WideString str = m_TempTextBuf.MakeString();
bool bPrevSpace = false;
- for (FX_STRSIZE i = 0; i < str.GetLength(); i++) {
+ for (size_t i = 0; i < str.GetLength(); i++) {
if (str[i] != ' ') {
bPrevSpace = false;
continue;
@@ -838,13 +838,12 @@ FPDFText_MarkedContent CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) {
return FPDFText_MarkedContent::Done;
}
- FX_STRSIZE nItems = actText.GetLength();
- if (nItems < 1)
+ if (actText.IsEmpty())
return FPDFText_MarkedContent::Pass;
CPDF_Font* pFont = pTextObj->GetFont();
bExist = false;
- for (FX_STRSIZE i = 0; i < nItems; i++) {
+ for (size_t i = 0; i < actText.GetLength(); i++) {
if (pFont->CharCodeFromUnicode(actText[i]) != CPDF_Font::kInvalidCharCode) {
bExist = true;
break;
@@ -854,7 +853,7 @@ FPDFText_MarkedContent CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) {
return FPDFText_MarkedContent::Pass;
bExist = false;
- for (FX_STRSIZE i = 0; i < nItems; i++) {
+ for (size_t i = 0; i < actText.GetLength(); i++) {
wchar_t wChar = actText[i];
if ((wChar > 0x80 && wChar < 0xFFFD) || (wChar <= 0x80 && isprint(wChar))) {
bExist = true;
@@ -883,15 +882,14 @@ void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) {
if (pDict)
actText = pDict->GetUnicodeTextFor("ActualText");
}
- FX_STRSIZE nItems = actText.GetLength();
- if (nItems < 1)
+ if (actText.IsEmpty())
return;
CPDF_Font* pFont = pTextObj->GetFont();
CFX_Matrix matrix = pTextObj->GetTextMatrix();
matrix.Concat(Obj.m_formMatrix);
- for (FX_STRSIZE k = 0; k < nItems; k++) {
+ for (size_t k = 0; k < actText.GetLength(); k++) {
wchar_t wChar = actText[k];
if (wChar <= 0x80 && !isprint(wChar))
wChar = 0x20;
diff --git a/core/fpdftext/cpdf_textpagefind.cpp b/core/fpdftext/cpdf_textpagefind.cpp
index f00b8a9f4d..9f9be202d6 100644
--- a/core/fpdftext/cpdf_textpagefind.cpp
+++ b/core/fpdftext/cpdf_textpagefind.cpp
@@ -81,7 +81,7 @@ int CPDF_TextPageFind::GetCharIndex(int index) const {
bool CPDF_TextPageFind::FindFirst(const WideString& findwhat,
int flags,
- pdfium::Optional<FX_STRSIZE> startPos) {
+ pdfium::Optional<size_t> startPos) {
if (!m_pTextPage)
return false;
if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE))
@@ -94,7 +94,7 @@ bool CPDF_TextPageFind::FindFirst(const WideString& findwhat,
m_IsFind = false;
return true;
}
- FX_STRSIZE len = findwhatStr.GetLength();
+ size_t len = findwhatStr.GetLength();
if (!m_bMatchCase) {
findwhatStr.MakeLower();
m_strText.MakeLower();
@@ -102,25 +102,24 @@ bool CPDF_TextPageFind::FindFirst(const WideString& findwhat,
m_bMatchWholeWord = !!(flags & FPDFTEXT_MATCHWHOLEWORD);
m_findNextStart = startPos;
if (!startPos.has_value()) {
- if (m_strText.GetLength() > 0)
+ if (!m_strText.IsEmpty())
m_findPreStart = m_strText.GetLength() - 1;
} else {
m_findPreStart = startPos;
}
m_csFindWhatArray.clear();
- FX_STRSIZE i = 0;
- while (i < len) {
+ size_t i = 0;
+ for (i = 0; i < len; ++i)
if (findwhatStr[i] != ' ')
break;
- i++;
- }
if (i < len)
ExtractFindWhat(findwhatStr);
else
m_csFindWhatArray.push_back(findwhatStr);
if (m_csFindWhatArray.empty())
return false;
+
m_IsFind = true;
m_resStart = 0;
m_resEnd = -1;
@@ -137,14 +136,14 @@ bool CPDF_TextPageFind::FindNext() {
m_IsFind = false;
return m_IsFind;
}
- FX_STRSIZE strLen = m_strText.GetLength();
+ size_t strLen = m_strText.GetLength();
if (m_findNextStart.value() > strLen - 1) {
m_IsFind = false;
return m_IsFind;
}
int nCount = pdfium::CollectionSize<int>(m_csFindWhatArray);
- pdfium::Optional<FX_STRSIZE> nResultPos = 0;
- FX_STRSIZE nStartPos = m_findNextStart.value();
+ pdfium::Optional<size_t> nResultPos = 0;
+ size_t nStartPos = m_findNextStart.value();
bool bSpaceStart = false;
for (int iWord = 0; iWord < nCount; iWord++) {
WideString csWord = m_csFindWhatArray[iWord];
@@ -162,18 +161,17 @@ bool CPDF_TextPageFind::FindNext() {
}
continue;
}
- FX_STRSIZE endIndex;
nResultPos = m_strText.Find(csWord.c_str(), nStartPos);
if (!nResultPos.has_value()) {
m_IsFind = false;
return m_IsFind;
}
- endIndex = nResultPos.value() + csWord.GetLength() - 1;
+ size_t endIndex = nResultPos.value() + csWord.GetLength() - 1;
if (iWord == 0)
m_resStart = nResultPos.value();
bool bMatch = true;
if (iWord != 0 && !bSpaceStart) {
- FX_STRSIZE PreResEndPos = nStartPos;
+ size_t PreResEndPos = nStartPos;
int curChar = csWord[0];
WideString lastWord = m_csFindWhatArray[iWord - 1];
int lastChar = lastWord[lastWord.GetLength() - 1];
@@ -182,7 +180,7 @@ bool CPDF_TextPageFind::FindNext() {
IsIgnoreSpaceCharacter(curChar))) {
bMatch = false;
}
- for (FX_STRSIZE d = PreResEndPos; d < nResultPos.value(); d++) {
+ for (size_t d = PreResEndPos; d < nResultPos.value(); d++) {
wchar_t strInsert = m_strText[d];
if (strInsert != TEXT_LINEFEED_CHAR && strInsert != TEXT_SPACE_CHAR &&
strInsert != TEXT_RETURN_CHAR && strInsert != 160) {
@@ -238,20 +236,21 @@ bool CPDF_TextPageFind::FindPrev() {
return m_IsFind;
}
CPDF_TextPageFind findEngine(m_pTextPage.Get());
- bool ret = findEngine.FindFirst(m_findWhat, m_flags,
- pdfium::Optional<FX_STRSIZE>(0));
+ bool ret =
+ findEngine.FindFirst(m_findWhat, m_flags, pdfium::Optional<size_t>(0));
if (!ret) {
m_IsFind = false;
return m_IsFind;
}
- int order = -1, MatchedCount = 0;
+ int order = -1;
+ int MatchedCount = 0;
while (ret) {
ret = findEngine.FindNext();
if (ret) {
int order1 = findEngine.GetCurOrder();
int MatchedCount1 = findEngine.GetMatchedCount();
- if (static_cast<FX_STRSIZE>((order1 + MatchedCount1)) >
- m_findPreStart.value() + 1)
+ int temp = order1 + MatchedCount1;
+ if (temp < 0 || static_cast<size_t>(temp) > m_findPreStart.value() + 1)
break;
order = order1;
MatchedCount = MatchedCount1;
@@ -292,7 +291,7 @@ void CPDF_TextPageFind::ExtractFindWhat(const WideString& findwhat) {
break;
}
}
- FX_STRSIZE pos = 0;
+ size_t pos = 0;
while (pos < csWord.GetLength()) {
WideString curStr = csWord.Mid(pos, 1);
wchar_t curChar = csWord[pos];
@@ -321,13 +320,13 @@ void CPDF_TextPageFind::ExtractFindWhat(const WideString& findwhat) {
}
bool CPDF_TextPageFind::IsMatchWholeWord(const WideString& csPageText,
- FX_STRSIZE startPos,
- FX_STRSIZE endPos) {
+ size_t startPos,
+ size_t endPos) {
if (startPos > endPos)
return false;
wchar_t char_left = 0;
wchar_t char_right = 0;
- FX_STRSIZE char_count = endPos - startPos + 1;
+ size_t char_count = endPos - startPos + 1;
if (char_count == 0)
return false;
if (char_count == 1 && csPageText[startPos] > 255)
diff --git a/core/fpdftext/cpdf_textpagefind.h b/core/fpdftext/cpdf_textpagefind.h
index face4e46b4..574f05ed0d 100644
--- a/core/fpdftext/cpdf_textpagefind.h
+++ b/core/fpdftext/cpdf_textpagefind.h
@@ -24,7 +24,7 @@ class CPDF_TextPageFind {
bool FindFirst(const WideString& findwhat,
int flags,
- pdfium::Optional<FX_STRSIZE> startPos);
+ pdfium::Optional<size_t> startPos);
bool FindNext();
bool FindPrev();
int GetCurOrder() const;
@@ -33,8 +33,8 @@ class CPDF_TextPageFind {
protected:
void ExtractFindWhat(const WideString& findwhat);
bool IsMatchWholeWord(const WideString& csPageText,
- FX_STRSIZE startPos,
- FX_STRSIZE endPos);
+ size_t startPos,
+ size_t endPos);
bool ExtractSubString(WideString& rString,
const wchar_t* lpszFullString,
int iSubString,
@@ -48,8 +48,8 @@ class CPDF_TextPageFind {
WideString m_findWhat;
int m_flags;
std::vector<WideString> m_csFindWhatArray;
- pdfium::Optional<FX_STRSIZE> m_findNextStart;
- pdfium::Optional<FX_STRSIZE> m_findPreStart;
+ pdfium::Optional<size_t> m_findNextStart;
+ pdfium::Optional<size_t> m_findPreStart;
bool m_bMatchCase;
bool m_bMatchWholeWord;
int m_resStart;