summaryrefslogtreecommitdiff
path: root/core/fpdftext
diff options
context:
space:
mode:
Diffstat (limited to 'core/fpdftext')
-rw-r--r--core/fpdftext/cpdf_linkextract.cpp16
-rw-r--r--core/fpdftext/cpdf_linkextract.h10
-rw-r--r--core/fpdftext/cpdf_linkextract_unittest.cpp10
-rw-r--r--core/fpdftext/cpdf_textpage.cpp49
-rw-r--r--core/fpdftext/cpdf_textpage.h4
-rw-r--r--core/fpdftext/cpdf_textpagefind.cpp18
-rw-r--r--core/fpdftext/cpdf_textpagefind.h14
7 files changed, 60 insertions, 61 deletions
diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp
index a5eafe689a..91116711e6 100644
--- a/core/fpdftext/cpdf_linkextract.cpp
+++ b/core/fpdftext/cpdf_linkextract.cpp
@@ -19,7 +19,7 @@ namespace {
// |end|. The purpose of this function is to separate url from the surrounding
// context characters, we do not intend to fully validate the url. |str|
// contains lower case characters only.
-FX_STRSIZE FindWebLinkEnding(const CFX_WideString& str,
+FX_STRSIZE FindWebLinkEnding(const WideString& str,
FX_STRSIZE start,
FX_STRSIZE end) {
if (str.Contains(L'/', start)) {
@@ -67,7 +67,7 @@ FX_STRSIZE FindWebLinkEnding(const CFX_WideString& str,
// Remove characters from the end of |str|, delimited by |start| and |end|, up
// to and including |charToFind|. No-op if |charToFind| is not present. Updates
// |end| if characters were removed.
-void TrimBackwardsToChar(const CFX_WideString& str,
+void TrimBackwardsToChar(const WideString& str,
wchar_t charToFind,
FX_STRSIZE start,
FX_STRSIZE* end) {
@@ -83,7 +83,7 @@ void TrimBackwardsToChar(const CFX_WideString& str,
// |start| and |end| in |str|. Matches a closing bracket or quote for each
// opening character and, if present, removes everything afterwards. Returns the
// new end position for the string.
-FX_STRSIZE TrimExternalBracketsFromWebLink(const CFX_WideString& str,
+FX_STRSIZE TrimExternalBracketsFromWebLink(const WideString& str,
FX_STRSIZE start,
FX_STRSIZE end) {
for (FX_STRSIZE pos = 0; pos < start; pos++) {
@@ -144,7 +144,7 @@ void CPDF_LinkExtract::ParseLink() {
pos++;
continue;
}
- CFX_WideString strBeCheck;
+ WideString strBeCheck;
strBeCheck = m_pTextPage->GetPageText(start, nCount);
if (bLineBreak) {
strBeCheck.Remove(TEXT_LINEFEED_CHAR);
@@ -187,7 +187,7 @@ void CPDF_LinkExtract::ParseLink() {
}
}
-bool CPDF_LinkExtract::CheckWebLink(CFX_WideString* strBeCheck,
+bool CPDF_LinkExtract::CheckWebLink(WideString* strBeCheck,
int32_t* nStart,
int32_t* nCount) {
static const wchar_t kHttpScheme[] = L"http";
@@ -195,7 +195,7 @@ bool CPDF_LinkExtract::CheckWebLink(CFX_WideString* strBeCheck,
static const wchar_t kWWWAddrStart[] = L"www.";
static const FX_STRSIZE kWWWAddrStartLen = FXSYS_len(kWWWAddrStart);
- CFX_WideString str = *strBeCheck;
+ WideString str = *strBeCheck;
str.MakeLower();
FX_STRSIZE len = str.GetLength();
@@ -237,7 +237,7 @@ bool CPDF_LinkExtract::CheckWebLink(CFX_WideString* strBeCheck,
return false;
}
-bool CPDF_LinkExtract::CheckMailLink(CFX_WideString* str) {
+bool CPDF_LinkExtract::CheckMailLink(WideString* str) {
auto aPos = str->Find(L'@');
// Invalid when no '@' or when starts/ends with '@'.
if (!aPos.has_value() || aPos.value() == 0 || aPos == str->GetLength() - 1)
@@ -305,7 +305,7 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString* str) {
return true;
}
-CFX_WideString CPDF_LinkExtract::GetURL(size_t index) const {
+WideString CPDF_LinkExtract::GetURL(size_t index) const {
return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L"";
}
diff --git a/core/fpdftext/cpdf_linkextract.h b/core/fpdftext/cpdf_linkextract.h
index 5c022d3d78..db82deb684 100644
--- a/core/fpdftext/cpdf_linkextract.h
+++ b/core/fpdftext/cpdf_linkextract.h
@@ -22,23 +22,23 @@ class CPDF_LinkExtract {
void ExtractLinks();
size_t CountLinks() const { return m_LinkArray.size(); }
- CFX_WideString GetURL(size_t index) const;
+ WideString GetURL(size_t index) const;
std::vector<CFX_FloatRect> GetRects(size_t index) const;
protected:
void ParseLink();
- bool CheckWebLink(CFX_WideString* str, int32_t* nStart, int32_t* nCount);
- bool CheckMailLink(CFX_WideString* str);
+ bool CheckWebLink(WideString* str, int32_t* nStart, int32_t* nCount);
+ bool CheckMailLink(WideString* str);
private:
struct Link {
int m_Start;
int m_Count;
- CFX_WideString m_strUrl;
+ WideString m_strUrl;
};
CFX_UnownedPtr<const CPDF_TextPage> const m_pTextPage;
- CFX_WideString m_strPageText;
+ WideString m_strPageText;
std::vector<Link> m_LinkArray;
};
diff --git a/core/fpdftext/cpdf_linkextract_unittest.cpp b/core/fpdftext/cpdf_linkextract_unittest.cpp
index efeb53ebe2..30438e6c10 100644
--- a/core/fpdftext/cpdf_linkextract_unittest.cpp
+++ b/core/fpdftext/cpdf_linkextract_unittest.cpp
@@ -32,7 +32,7 @@ TEST(CPDF_LinkExtractTest, CheckMailLink) {
};
for (size_t i = 0; i < FX_ArraySize(invalid_strs); ++i) {
const wchar_t* const input = invalid_strs[i];
- CFX_WideString text_str(input);
+ WideString text_str(input);
EXPECT_FALSE(extractor.CheckMailLink(&text_str)) << input;
}
@@ -53,8 +53,8 @@ TEST(CPDF_LinkExtractTest, CheckMailLink) {
};
for (size_t i = 0; i < FX_ArraySize(valid_strs); ++i) {
const wchar_t* const input = valid_strs[i][0];
- CFX_WideString text_str(input);
- CFX_WideString expected_str(L"mailto:");
+ WideString text_str(input);
+ WideString expected_str(L"mailto:");
expected_str += valid_strs[i][1];
EXPECT_TRUE(extractor.CheckMailLink(&text_str)) << input;
EXPECT_STREQ(expected_str.c_str(), text_str.c_str());
@@ -80,7 +80,7 @@ TEST(CPDF_LinkExtractTest, CheckWebLink) {
const int32_t DEFAULT_VALUE = -42;
for (size_t i = 0; i < FX_ArraySize(invalid_cases); ++i) {
const wchar_t* const input = invalid_cases[i];
- CFX_WideString text_str(input);
+ WideString text_str(input);
int32_t start_offset = DEFAULT_VALUE;
int32_t count = DEFAULT_VALUE;
EXPECT_FALSE(extractor.CheckWebLink(&text_str, &start_offset, &count))
@@ -175,7 +175,7 @@ TEST(CPDF_LinkExtractTest, CheckWebLink) {
};
for (size_t i = 0; i < FX_ArraySize(valid_cases); ++i) {
const wchar_t* const input = valid_cases[i].input_string;
- CFX_WideString text_str(input);
+ WideString text_str(input);
int32_t start_offset = DEFAULT_VALUE;
int32_t count = DEFAULT_VALUE;
EXPECT_TRUE(extractor.CheckWebLink(&text_str, &start_offset, &count))
diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp
index ee1d51bfd8..f73793cd72 100644
--- a/core/fpdftext/cpdf_textpage.cpp
+++ b/core/fpdftext/cpdf_textpage.cpp
@@ -348,14 +348,14 @@ int CPDF_TextPage::GetIndexAtPos(const CFX_PointF& point,
return pos < pdfium::CollectionSize<int>(m_CharList) ? pos : NearPos;
}
-CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const {
+WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const {
if (!m_bIsParsed)
- return CFX_WideString();
+ return WideString();
float posy = 0;
bool IsContainPreChar = false;
bool IsAddLineFeed = false;
- CFX_WideString strText;
+ WideString strText;
for (const auto& charinfo : m_CharList) {
if (IsRectIntersect(rect, charinfo.m_CharBox)) {
if (fabs(posy - charinfo.m_Origin.y) > 0 && !IsContainPreChar &&
@@ -436,7 +436,7 @@ void CPDF_TextPage::CheckMarkedContentObject(int32_t& start,
}
}
-CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const {
+WideString CPDF_TextPage::GetPageText(int start, int nCount) const {
if (!m_bIsParsed || nCount == 0)
return L"";
@@ -445,8 +445,8 @@ CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const {
if (nCount == -1) {
nCount = pdfium::CollectionSize<int>(m_CharList) - start;
- CFX_WideStringC wsTextBuf = m_TextBuf.AsStringC();
- return CFX_WideString(wsTextBuf.Right(wsTextBuf.GetLength() - start));
+ WideStringView wsTextBuf = m_TextBuf.AsStringView();
+ return WideString(wsTextBuf.Right(wsTextBuf.GetLength() - start));
}
if (nCount <= 0 || m_CharList.empty())
return L"";
@@ -478,7 +478,7 @@ CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const {
nCount = start + nCount - nCountOffset - startindex;
if (nCount <= 0)
return L"";
- return CFX_WideString(m_TextBuf.AsStringC().Mid(startindex, nCount));
+ return WideString(m_TextBuf.AsStringView().Mid(startindex, nCount));
}
int CPDF_TextPage::CountRects(int start, int nCount) {
@@ -630,7 +630,7 @@ int CPDF_TextPage::GetCharWidth(uint32_t charCode, CPDF_Font* pFont) const {
if (int w = pFont->GetCharWidthF(charCode))
return w;
- CFX_ByteString str;
+ ByteString str;
pFont->AppendChar(&str, charCode);
if (int w = pFont->GetStringWidth(str.c_str(), 1))
return w;
@@ -702,7 +702,7 @@ void CPDF_TextPage::CloseTempLine() {
if (m_TempCharList.empty())
return;
- CFX_WideString str = m_TempTextBuf.MakeString();
+ WideString str = m_TempTextBuf.MakeString();
bool bPrevSpace = false;
for (FX_STRSIZE i = 0; i < str.GetLength(); i++) {
if (str[i] != ' ') {
@@ -814,7 +814,7 @@ FPDFText_MarkedContent CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) {
if (nContentMark < 1)
return FPDFText_MarkedContent::Pass;
- CFX_WideString actText;
+ WideString actText;
bool bExist = false;
CPDF_Dictionary* pDict = nullptr;
int n = 0;
@@ -876,7 +876,7 @@ void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) {
if (nContentMark < 1)
return;
- CFX_WideString actText;
+ WideString actText;
for (int n = 0; n < nContentMark; n++) {
const CPDF_ContentMarkItem& item = pTextObj->m_ContentMark.GetItem(n);
CPDF_Dictionary* pDict = item.GetParam();
@@ -941,13 +941,13 @@ void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend,
bool CPDF_TextPage::IsRightToLeft(const CPDF_TextObject* pTextObj,
const CPDF_Font* pFont,
int nItems) const {
- CFX_WideString str;
+ WideString str;
for (int32_t i = 0; i < nItems; i++) {
CPDF_TextObjectItem item;
pTextObj->GetItemInfo(i, &item);
if (item.m_CharCode == static_cast<uint32_t>(-1))
continue;
- CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode);
+ WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode);
wchar_t wChar = !wstrItem.IsEmpty() ? wstrItem[0] : 0;
if (wChar == 0)
wChar = item.m_CharCode;
@@ -1004,7 +1004,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
if (pTextObj->CountChars() == 1) {
CPDF_TextObjectItem item;
pTextObj->GetCharInfo(0, &item);
- CFX_WideString wstrItem =
+ WideString wstrItem =
pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
if (wstrItem.IsEmpty())
wstrItem += (wchar_t)item.m_CharCode;
@@ -1013,7 +1013,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
return;
}
while (m_TempTextBuf.GetSize() > 0 &&
- m_TempTextBuf.AsStringC()[m_TempTextBuf.GetLength() - 1] ==
+ m_TempTextBuf.AsStringView()[m_TempTextBuf.GetLength() - 1] ==
0x20) {
m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
m_TempCharList.pop_back();
@@ -1053,9 +1053,9 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
PAGECHAR_INFO charinfo;
pTextObj->GetItemInfo(i, &item);
if (item.m_CharCode == static_cast<uint32_t>(-1)) {
- CFX_WideString str = m_TempTextBuf.MakeString();
+ WideString str = m_TempTextBuf.MakeString();
if (str.IsEmpty())
- str = m_TextBuf.AsStringC();
+ str = m_TextBuf.AsStringView();
if (str.IsEmpty() || str[str.GetLength() - 1] == TEXT_SPACE_CHAR)
continue;
@@ -1106,7 +1106,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
continue;
}
spacing = 0;
- CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode);
+ WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode);
bool bNoUnicode = false;
if (wstrItem.IsEmpty() && item.m_CharCode) {
wstrItem += static_cast<wchar_t>(item.m_CharCode);
@@ -1177,7 +1177,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) {
m_TempCharList.push_back(charinfo);
}
} else if (i == 0) {
- CFX_WideString str = m_TempTextBuf.MakeString();
+ WideString str = m_TempTextBuf.MakeString();
if (!str.IsEmpty() && str[str.GetLength() - 1] == TEXT_SPACE_CHAR) {
m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1);
m_TempCharList.pop_back();
@@ -1220,11 +1220,11 @@ CPDF_TextPage::TextOrientation CPDF_TextPage::GetTextObjectWritingMode(
}
bool CPDF_TextPage::IsHyphen(wchar_t curChar) const {
- CFX_WideStringC curText;
+ WideStringView curText;
if (!m_TempTextBuf.IsEmpty())
- curText = m_TempTextBuf.AsStringC();
+ curText = m_TempTextBuf.AsStringView();
else if (!m_TextBuf.IsEmpty())
- curText = m_TextBuf.AsStringC();
+ curText = m_TextBuf.AsStringView();
else
return false;
@@ -1267,8 +1267,7 @@ CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject(
int nItem = m_pPreTextObj->CountItems();
m_pPreTextObj->GetItemInfo(nItem - 1, &PrevItem);
pObj->GetItemInfo(0, &item);
- CFX_WideString wstrItem =
- pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
+ WideString wstrItem = pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode);
if (wstrItem.IsEmpty())
wstrItem += static_cast<wchar_t>(item.m_CharCode);
wchar_t curChar = wstrItem[0];
@@ -1356,7 +1355,7 @@ CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject(
IsHyphen(curChar)) {
return GenerateCharacter::Hyphen;
}
- CFX_WideString PrevStr =
+ WideString PrevStr =
m_pPreTextObj->GetFont()->UnicodeFromCharCode(PrevItem.m_CharCode);
if (PrevStr.IsEmpty())
return GenerateCharacter::None;
diff --git a/core/fpdftext/cpdf_textpage.h b/core/fpdftext/cpdf_textpage.h
index 41892ea97c..ef55ad05cb 100644
--- a/core/fpdftext/cpdf_textpage.h
+++ b/core/fpdftext/cpdf_textpage.h
@@ -106,8 +106,8 @@ class CPDF_TextPage {
void GetCharInfo(int index, FPDF_CHAR_INFO* info) const;
std::vector<CFX_FloatRect> GetRectArray(int start, int nCount) const;
int GetIndexAtPos(const CFX_PointF& point, const CFX_SizeF& tolerance) const;
- CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const;
- CFX_WideString GetPageText(int start = 0, int nCount = -1) const;
+ WideString GetTextByRect(const CFX_FloatRect& rect) const;
+ WideString GetPageText(int start = 0, int nCount = -1) const;
int CountRects(int start, int nCount);
void GetRect(int rectIndex,
float& left,
diff --git a/core/fpdftext/cpdf_textpagefind.cpp b/core/fpdftext/cpdf_textpagefind.cpp
index 3678e42515..f00b8a9f4d 100644
--- a/core/fpdftext/cpdf_textpagefind.cpp
+++ b/core/fpdftext/cpdf_textpagefind.cpp
@@ -79,14 +79,14 @@ int CPDF_TextPageFind::GetCharIndex(int index) const {
return m_pTextPage->CharIndexFromTextIndex(index);
}
-bool CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat,
+bool CPDF_TextPageFind::FindFirst(const WideString& findwhat,
int flags,
pdfium::Optional<FX_STRSIZE> startPos) {
if (!m_pTextPage)
return false;
if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE))
m_strText = m_pTextPage->GetPageText();
- CFX_WideString findwhatStr = findwhat;
+ WideString findwhatStr = findwhat;
m_findWhat = findwhatStr;
m_flags = flags;
m_bMatchCase = flags & FPDFTEXT_MATCHCASE;
@@ -147,7 +147,7 @@ bool CPDF_TextPageFind::FindNext() {
FX_STRSIZE nStartPos = m_findNextStart.value();
bool bSpaceStart = false;
for (int iWord = 0; iWord < nCount; iWord++) {
- CFX_WideString csWord = m_csFindWhatArray[iWord];
+ WideString csWord = m_csFindWhatArray[iWord];
if (csWord.IsEmpty()) {
if (iWord == nCount - 1) {
wchar_t strInsert = m_strText[nStartPos];
@@ -175,7 +175,7 @@ bool CPDF_TextPageFind::FindNext() {
if (iWord != 0 && !bSpaceStart) {
FX_STRSIZE PreResEndPos = nStartPos;
int curChar = csWord[0];
- CFX_WideString lastWord = m_csFindWhatArray[iWord - 1];
+ WideString lastWord = m_csFindWhatArray[iWord - 1];
int lastChar = lastWord[lastWord.GetLength() - 1];
if (nStartPos == nResultPos.value() &&
!(IsIgnoreSpaceCharacter(lastChar) ||
@@ -275,12 +275,12 @@ bool CPDF_TextPageFind::FindPrev() {
return m_IsFind;
}
-void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) {
+void CPDF_TextPageFind::ExtractFindWhat(const WideString& findwhat) {
if (findwhat.IsEmpty())
return;
int index = 0;
while (1) {
- CFX_WideString csWord = TEXT_EMPTY;
+ WideString csWord = TEXT_EMPTY;
int ret =
ExtractSubString(csWord, findwhat.c_str(), index, TEXT_SPACE_CHAR);
if (csWord.IsEmpty()) {
@@ -294,7 +294,7 @@ void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) {
}
FX_STRSIZE pos = 0;
while (pos < csWord.GetLength()) {
- CFX_WideString curStr = csWord.Mid(pos, 1);
+ WideString curStr = csWord.Mid(pos, 1);
wchar_t curChar = csWord[pos];
if (IsIgnoreSpaceCharacter(curChar)) {
if (pos > 0 && curChar == 0x2019) {
@@ -320,7 +320,7 @@ void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) {
}
}
-bool CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText,
+bool CPDF_TextPageFind::IsMatchWholeWord(const WideString& csPageText,
FX_STRSIZE startPos,
FX_STRSIZE endPos) {
if (startPos > endPos)
@@ -360,7 +360,7 @@ bool CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText,
return true;
}
-bool CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString,
+bool CPDF_TextPageFind::ExtractSubString(WideString& rString,
const wchar_t* lpszFullString,
int iSubString,
wchar_t chSep) {
diff --git a/core/fpdftext/cpdf_textpagefind.h b/core/fpdftext/cpdf_textpagefind.h
index cf8d3d1702..f9a28a0b0a 100644
--- a/core/fpdftext/cpdf_textpagefind.h
+++ b/core/fpdftext/cpdf_textpagefind.h
@@ -22,7 +22,7 @@ class CPDF_TextPageFind {
explicit CPDF_TextPageFind(const CPDF_TextPage* pTextPage);
~CPDF_TextPageFind();
- bool FindFirst(const CFX_WideString& findwhat,
+ bool FindFirst(const WideString& findwhat,
int flags,
pdfium::Optional<FX_STRSIZE> startPos);
bool FindNext();
@@ -31,11 +31,11 @@ class CPDF_TextPageFind {
int GetMatchedCount() const;
protected:
- void ExtractFindWhat(const CFX_WideString& findwhat);
- bool IsMatchWholeWord(const CFX_WideString& csPageText,
+ void ExtractFindWhat(const WideString& findwhat);
+ bool IsMatchWholeWord(const WideString& csPageText,
FX_STRSIZE startPos,
FX_STRSIZE endPos);
- bool ExtractSubString(CFX_WideString& rString,
+ bool ExtractSubString(WideString& rString,
const wchar_t* lpszFullString,
int iSubString,
wchar_t chSep);
@@ -44,10 +44,10 @@ class CPDF_TextPageFind {
private:
std::vector<uint16_t> m_CharIndex;
CFX_UnownedPtr<const CPDF_TextPage> m_pTextPage;
- CFX_WideString m_strText;
- CFX_WideString m_findWhat;
+ WideString m_strText;
+ WideString m_findWhat;
int m_flags;
- std::vector<CFX_WideString> m_csFindWhatArray;
+ std::vector<WideString> m_csFindWhatArray;
pdfium::Optional<FX_STRSIZE> m_findNextStart;
pdfium::Optional<FX_STRSIZE> m_findPreStart;
bool m_bMatchCase;