diff options
Diffstat (limited to 'core/fpdftext')
-rw-r--r-- | core/fpdftext/cpdf_linkextract.cpp | 16 | ||||
-rw-r--r-- | core/fpdftext/cpdf_linkextract.h | 10 | ||||
-rw-r--r-- | core/fpdftext/cpdf_linkextract_unittest.cpp | 10 | ||||
-rw-r--r-- | core/fpdftext/cpdf_textpage.cpp | 49 | ||||
-rw-r--r-- | core/fpdftext/cpdf_textpage.h | 4 | ||||
-rw-r--r-- | core/fpdftext/cpdf_textpagefind.cpp | 18 | ||||
-rw-r--r-- | core/fpdftext/cpdf_textpagefind.h | 14 |
7 files changed, 60 insertions, 61 deletions
diff --git a/core/fpdftext/cpdf_linkextract.cpp b/core/fpdftext/cpdf_linkextract.cpp index a5eafe689a..91116711e6 100644 --- a/core/fpdftext/cpdf_linkextract.cpp +++ b/core/fpdftext/cpdf_linkextract.cpp @@ -19,7 +19,7 @@ namespace { // |end|. The purpose of this function is to separate url from the surrounding // context characters, we do not intend to fully validate the url. |str| // contains lower case characters only. -FX_STRSIZE FindWebLinkEnding(const CFX_WideString& str, +FX_STRSIZE FindWebLinkEnding(const WideString& str, FX_STRSIZE start, FX_STRSIZE end) { if (str.Contains(L'/', start)) { @@ -67,7 +67,7 @@ FX_STRSIZE FindWebLinkEnding(const CFX_WideString& str, // Remove characters from the end of |str|, delimited by |start| and |end|, up // to and including |charToFind|. No-op if |charToFind| is not present. Updates // |end| if characters were removed. -void TrimBackwardsToChar(const CFX_WideString& str, +void TrimBackwardsToChar(const WideString& str, wchar_t charToFind, FX_STRSIZE start, FX_STRSIZE* end) { @@ -83,7 +83,7 @@ void TrimBackwardsToChar(const CFX_WideString& str, // |start| and |end| in |str|. Matches a closing bracket or quote for each // opening character and, if present, removes everything afterwards. Returns the // new end position for the string. -FX_STRSIZE TrimExternalBracketsFromWebLink(const CFX_WideString& str, +FX_STRSIZE TrimExternalBracketsFromWebLink(const WideString& str, FX_STRSIZE start, FX_STRSIZE end) { for (FX_STRSIZE pos = 0; pos < start; pos++) { @@ -144,7 +144,7 @@ void CPDF_LinkExtract::ParseLink() { pos++; continue; } - CFX_WideString strBeCheck; + WideString strBeCheck; strBeCheck = m_pTextPage->GetPageText(start, nCount); if (bLineBreak) { strBeCheck.Remove(TEXT_LINEFEED_CHAR); @@ -187,7 +187,7 @@ void CPDF_LinkExtract::ParseLink() { } } -bool CPDF_LinkExtract::CheckWebLink(CFX_WideString* strBeCheck, +bool CPDF_LinkExtract::CheckWebLink(WideString* strBeCheck, int32_t* nStart, int32_t* nCount) { static const wchar_t kHttpScheme[] = L"http"; @@ -195,7 +195,7 @@ bool CPDF_LinkExtract::CheckWebLink(CFX_WideString* strBeCheck, static const wchar_t kWWWAddrStart[] = L"www."; static const FX_STRSIZE kWWWAddrStartLen = FXSYS_len(kWWWAddrStart); - CFX_WideString str = *strBeCheck; + WideString str = *strBeCheck; str.MakeLower(); FX_STRSIZE len = str.GetLength(); @@ -237,7 +237,7 @@ bool CPDF_LinkExtract::CheckWebLink(CFX_WideString* strBeCheck, return false; } -bool CPDF_LinkExtract::CheckMailLink(CFX_WideString* str) { +bool CPDF_LinkExtract::CheckMailLink(WideString* str) { auto aPos = str->Find(L'@'); // Invalid when no '@' or when starts/ends with '@'. if (!aPos.has_value() || aPos.value() == 0 || aPos == str->GetLength() - 1) @@ -305,7 +305,7 @@ bool CPDF_LinkExtract::CheckMailLink(CFX_WideString* str) { return true; } -CFX_WideString CPDF_LinkExtract::GetURL(size_t index) const { +WideString CPDF_LinkExtract::GetURL(size_t index) const { return index < m_LinkArray.size() ? m_LinkArray[index].m_strUrl : L""; } diff --git a/core/fpdftext/cpdf_linkextract.h b/core/fpdftext/cpdf_linkextract.h index 5c022d3d78..db82deb684 100644 --- a/core/fpdftext/cpdf_linkextract.h +++ b/core/fpdftext/cpdf_linkextract.h @@ -22,23 +22,23 @@ class CPDF_LinkExtract { void ExtractLinks(); size_t CountLinks() const { return m_LinkArray.size(); } - CFX_WideString GetURL(size_t index) const; + WideString GetURL(size_t index) const; std::vector<CFX_FloatRect> GetRects(size_t index) const; protected: void ParseLink(); - bool CheckWebLink(CFX_WideString* str, int32_t* nStart, int32_t* nCount); - bool CheckMailLink(CFX_WideString* str); + bool CheckWebLink(WideString* str, int32_t* nStart, int32_t* nCount); + bool CheckMailLink(WideString* str); private: struct Link { int m_Start; int m_Count; - CFX_WideString m_strUrl; + WideString m_strUrl; }; CFX_UnownedPtr<const CPDF_TextPage> const m_pTextPage; - CFX_WideString m_strPageText; + WideString m_strPageText; std::vector<Link> m_LinkArray; }; diff --git a/core/fpdftext/cpdf_linkextract_unittest.cpp b/core/fpdftext/cpdf_linkextract_unittest.cpp index efeb53ebe2..30438e6c10 100644 --- a/core/fpdftext/cpdf_linkextract_unittest.cpp +++ b/core/fpdftext/cpdf_linkextract_unittest.cpp @@ -32,7 +32,7 @@ TEST(CPDF_LinkExtractTest, CheckMailLink) { }; for (size_t i = 0; i < FX_ArraySize(invalid_strs); ++i) { const wchar_t* const input = invalid_strs[i]; - CFX_WideString text_str(input); + WideString text_str(input); EXPECT_FALSE(extractor.CheckMailLink(&text_str)) << input; } @@ -53,8 +53,8 @@ TEST(CPDF_LinkExtractTest, CheckMailLink) { }; for (size_t i = 0; i < FX_ArraySize(valid_strs); ++i) { const wchar_t* const input = valid_strs[i][0]; - CFX_WideString text_str(input); - CFX_WideString expected_str(L"mailto:"); + WideString text_str(input); + WideString expected_str(L"mailto:"); expected_str += valid_strs[i][1]; EXPECT_TRUE(extractor.CheckMailLink(&text_str)) << input; EXPECT_STREQ(expected_str.c_str(), text_str.c_str()); @@ -80,7 +80,7 @@ TEST(CPDF_LinkExtractTest, CheckWebLink) { const int32_t DEFAULT_VALUE = -42; for (size_t i = 0; i < FX_ArraySize(invalid_cases); ++i) { const wchar_t* const input = invalid_cases[i]; - CFX_WideString text_str(input); + WideString text_str(input); int32_t start_offset = DEFAULT_VALUE; int32_t count = DEFAULT_VALUE; EXPECT_FALSE(extractor.CheckWebLink(&text_str, &start_offset, &count)) @@ -175,7 +175,7 @@ TEST(CPDF_LinkExtractTest, CheckWebLink) { }; for (size_t i = 0; i < FX_ArraySize(valid_cases); ++i) { const wchar_t* const input = valid_cases[i].input_string; - CFX_WideString text_str(input); + WideString text_str(input); int32_t start_offset = DEFAULT_VALUE; int32_t count = DEFAULT_VALUE; EXPECT_TRUE(extractor.CheckWebLink(&text_str, &start_offset, &count)) diff --git a/core/fpdftext/cpdf_textpage.cpp b/core/fpdftext/cpdf_textpage.cpp index ee1d51bfd8..f73793cd72 100644 --- a/core/fpdftext/cpdf_textpage.cpp +++ b/core/fpdftext/cpdf_textpage.cpp @@ -348,14 +348,14 @@ int CPDF_TextPage::GetIndexAtPos(const CFX_PointF& point, return pos < pdfium::CollectionSize<int>(m_CharList) ? pos : NearPos; } -CFX_WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { +WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { if (!m_bIsParsed) - return CFX_WideString(); + return WideString(); float posy = 0; bool IsContainPreChar = false; bool IsAddLineFeed = false; - CFX_WideString strText; + WideString strText; for (const auto& charinfo : m_CharList) { if (IsRectIntersect(rect, charinfo.m_CharBox)) { if (fabs(posy - charinfo.m_Origin.y) > 0 && !IsContainPreChar && @@ -436,7 +436,7 @@ void CPDF_TextPage::CheckMarkedContentObject(int32_t& start, } } -CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { +WideString CPDF_TextPage::GetPageText(int start, int nCount) const { if (!m_bIsParsed || nCount == 0) return L""; @@ -445,8 +445,8 @@ CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { if (nCount == -1) { nCount = pdfium::CollectionSize<int>(m_CharList) - start; - CFX_WideStringC wsTextBuf = m_TextBuf.AsStringC(); - return CFX_WideString(wsTextBuf.Right(wsTextBuf.GetLength() - start)); + WideStringView wsTextBuf = m_TextBuf.AsStringView(); + return WideString(wsTextBuf.Right(wsTextBuf.GetLength() - start)); } if (nCount <= 0 || m_CharList.empty()) return L""; @@ -478,7 +478,7 @@ CFX_WideString CPDF_TextPage::GetPageText(int start, int nCount) const { nCount = start + nCount - nCountOffset - startindex; if (nCount <= 0) return L""; - return CFX_WideString(m_TextBuf.AsStringC().Mid(startindex, nCount)); + return WideString(m_TextBuf.AsStringView().Mid(startindex, nCount)); } int CPDF_TextPage::CountRects(int start, int nCount) { @@ -630,7 +630,7 @@ int CPDF_TextPage::GetCharWidth(uint32_t charCode, CPDF_Font* pFont) const { if (int w = pFont->GetCharWidthF(charCode)) return w; - CFX_ByteString str; + ByteString str; pFont->AppendChar(&str, charCode); if (int w = pFont->GetStringWidth(str.c_str(), 1)) return w; @@ -702,7 +702,7 @@ void CPDF_TextPage::CloseTempLine() { if (m_TempCharList.empty()) return; - CFX_WideString str = m_TempTextBuf.MakeString(); + WideString str = m_TempTextBuf.MakeString(); bool bPrevSpace = false; for (FX_STRSIZE i = 0; i < str.GetLength(); i++) { if (str[i] != ' ') { @@ -814,7 +814,7 @@ FPDFText_MarkedContent CPDF_TextPage::PreMarkedContent(PDFTEXT_Obj Obj) { if (nContentMark < 1) return FPDFText_MarkedContent::Pass; - CFX_WideString actText; + WideString actText; bool bExist = false; CPDF_Dictionary* pDict = nullptr; int n = 0; @@ -876,7 +876,7 @@ void CPDF_TextPage::ProcessMarkedContent(PDFTEXT_Obj Obj) { if (nContentMark < 1) return; - CFX_WideString actText; + WideString actText; for (int n = 0; n < nContentMark; n++) { const CPDF_ContentMarkItem& item = pTextObj->m_ContentMark.GetItem(n); CPDF_Dictionary* pDict = item.GetParam(); @@ -941,13 +941,13 @@ void CPDF_TextPage::SwapTempTextBuf(int32_t iCharListStartAppend, bool CPDF_TextPage::IsRightToLeft(const CPDF_TextObject* pTextObj, const CPDF_Font* pFont, int nItems) const { - CFX_WideString str; + WideString str; for (int32_t i = 0; i < nItems; i++) { CPDF_TextObjectItem item; pTextObj->GetItemInfo(i, &item); if (item.m_CharCode == static_cast<uint32_t>(-1)) continue; - CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); + WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); wchar_t wChar = !wstrItem.IsEmpty() ? wstrItem[0] : 0; if (wChar == 0) wChar = item.m_CharCode; @@ -1004,7 +1004,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { if (pTextObj->CountChars() == 1) { CPDF_TextObjectItem item; pTextObj->GetCharInfo(0, &item); - CFX_WideString wstrItem = + WideString wstrItem = pTextObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); if (wstrItem.IsEmpty()) wstrItem += (wchar_t)item.m_CharCode; @@ -1013,7 +1013,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { return; } while (m_TempTextBuf.GetSize() > 0 && - m_TempTextBuf.AsStringC()[m_TempTextBuf.GetLength() - 1] == + m_TempTextBuf.AsStringView()[m_TempTextBuf.GetLength() - 1] == 0x20) { m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); m_TempCharList.pop_back(); @@ -1053,9 +1053,9 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { PAGECHAR_INFO charinfo; pTextObj->GetItemInfo(i, &item); if (item.m_CharCode == static_cast<uint32_t>(-1)) { - CFX_WideString str = m_TempTextBuf.MakeString(); + WideString str = m_TempTextBuf.MakeString(); if (str.IsEmpty()) - str = m_TextBuf.AsStringC(); + str = m_TextBuf.AsStringView(); if (str.IsEmpty() || str[str.GetLength() - 1] == TEXT_SPACE_CHAR) continue; @@ -1106,7 +1106,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { continue; } spacing = 0; - CFX_WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); + WideString wstrItem = pFont->UnicodeFromCharCode(item.m_CharCode); bool bNoUnicode = false; if (wstrItem.IsEmpty() && item.m_CharCode) { wstrItem += static_cast<wchar_t>(item.m_CharCode); @@ -1177,7 +1177,7 @@ void CPDF_TextPage::ProcessTextObject(PDFTEXT_Obj Obj) { m_TempCharList.push_back(charinfo); } } else if (i == 0) { - CFX_WideString str = m_TempTextBuf.MakeString(); + WideString str = m_TempTextBuf.MakeString(); if (!str.IsEmpty() && str[str.GetLength() - 1] == TEXT_SPACE_CHAR) { m_TempTextBuf.Delete(m_TempTextBuf.GetLength() - 1, 1); m_TempCharList.pop_back(); @@ -1220,11 +1220,11 @@ CPDF_TextPage::TextOrientation CPDF_TextPage::GetTextObjectWritingMode( } bool CPDF_TextPage::IsHyphen(wchar_t curChar) const { - CFX_WideStringC curText; + WideStringView curText; if (!m_TempTextBuf.IsEmpty()) - curText = m_TempTextBuf.AsStringC(); + curText = m_TempTextBuf.AsStringView(); else if (!m_TextBuf.IsEmpty()) - curText = m_TextBuf.AsStringC(); + curText = m_TextBuf.AsStringView(); else return false; @@ -1267,8 +1267,7 @@ CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject( int nItem = m_pPreTextObj->CountItems(); m_pPreTextObj->GetItemInfo(nItem - 1, &PrevItem); pObj->GetItemInfo(0, &item); - CFX_WideString wstrItem = - pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); + WideString wstrItem = pObj->GetFont()->UnicodeFromCharCode(item.m_CharCode); if (wstrItem.IsEmpty()) wstrItem += static_cast<wchar_t>(item.m_CharCode); wchar_t curChar = wstrItem[0]; @@ -1356,7 +1355,7 @@ CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject( IsHyphen(curChar)) { return GenerateCharacter::Hyphen; } - CFX_WideString PrevStr = + WideString PrevStr = m_pPreTextObj->GetFont()->UnicodeFromCharCode(PrevItem.m_CharCode); if (PrevStr.IsEmpty()) return GenerateCharacter::None; diff --git a/core/fpdftext/cpdf_textpage.h b/core/fpdftext/cpdf_textpage.h index 41892ea97c..ef55ad05cb 100644 --- a/core/fpdftext/cpdf_textpage.h +++ b/core/fpdftext/cpdf_textpage.h @@ -106,8 +106,8 @@ class CPDF_TextPage { void GetCharInfo(int index, FPDF_CHAR_INFO* info) const; std::vector<CFX_FloatRect> GetRectArray(int start, int nCount) const; int GetIndexAtPos(const CFX_PointF& point, const CFX_SizeF& tolerance) const; - CFX_WideString GetTextByRect(const CFX_FloatRect& rect) const; - CFX_WideString GetPageText(int start = 0, int nCount = -1) const; + WideString GetTextByRect(const CFX_FloatRect& rect) const; + WideString GetPageText(int start = 0, int nCount = -1) const; int CountRects(int start, int nCount); void GetRect(int rectIndex, float& left, diff --git a/core/fpdftext/cpdf_textpagefind.cpp b/core/fpdftext/cpdf_textpagefind.cpp index 3678e42515..f00b8a9f4d 100644 --- a/core/fpdftext/cpdf_textpagefind.cpp +++ b/core/fpdftext/cpdf_textpagefind.cpp @@ -79,14 +79,14 @@ int CPDF_TextPageFind::GetCharIndex(int index) const { return m_pTextPage->CharIndexFromTextIndex(index); } -bool CPDF_TextPageFind::FindFirst(const CFX_WideString& findwhat, +bool CPDF_TextPageFind::FindFirst(const WideString& findwhat, int flags, pdfium::Optional<FX_STRSIZE> startPos) { if (!m_pTextPage) return false; if (m_strText.IsEmpty() || m_bMatchCase != (flags & FPDFTEXT_MATCHCASE)) m_strText = m_pTextPage->GetPageText(); - CFX_WideString findwhatStr = findwhat; + WideString findwhatStr = findwhat; m_findWhat = findwhatStr; m_flags = flags; m_bMatchCase = flags & FPDFTEXT_MATCHCASE; @@ -147,7 +147,7 @@ bool CPDF_TextPageFind::FindNext() { FX_STRSIZE nStartPos = m_findNextStart.value(); bool bSpaceStart = false; for (int iWord = 0; iWord < nCount; iWord++) { - CFX_WideString csWord = m_csFindWhatArray[iWord]; + WideString csWord = m_csFindWhatArray[iWord]; if (csWord.IsEmpty()) { if (iWord == nCount - 1) { wchar_t strInsert = m_strText[nStartPos]; @@ -175,7 +175,7 @@ bool CPDF_TextPageFind::FindNext() { if (iWord != 0 && !bSpaceStart) { FX_STRSIZE PreResEndPos = nStartPos; int curChar = csWord[0]; - CFX_WideString lastWord = m_csFindWhatArray[iWord - 1]; + WideString lastWord = m_csFindWhatArray[iWord - 1]; int lastChar = lastWord[lastWord.GetLength() - 1]; if (nStartPos == nResultPos.value() && !(IsIgnoreSpaceCharacter(lastChar) || @@ -275,12 +275,12 @@ bool CPDF_TextPageFind::FindPrev() { return m_IsFind; } -void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) { +void CPDF_TextPageFind::ExtractFindWhat(const WideString& findwhat) { if (findwhat.IsEmpty()) return; int index = 0; while (1) { - CFX_WideString csWord = TEXT_EMPTY; + WideString csWord = TEXT_EMPTY; int ret = ExtractSubString(csWord, findwhat.c_str(), index, TEXT_SPACE_CHAR); if (csWord.IsEmpty()) { @@ -294,7 +294,7 @@ void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) { } FX_STRSIZE pos = 0; while (pos < csWord.GetLength()) { - CFX_WideString curStr = csWord.Mid(pos, 1); + WideString curStr = csWord.Mid(pos, 1); wchar_t curChar = csWord[pos]; if (IsIgnoreSpaceCharacter(curChar)) { if (pos > 0 && curChar == 0x2019) { @@ -320,7 +320,7 @@ void CPDF_TextPageFind::ExtractFindWhat(const CFX_WideString& findwhat) { } } -bool CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText, +bool CPDF_TextPageFind::IsMatchWholeWord(const WideString& csPageText, FX_STRSIZE startPos, FX_STRSIZE endPos) { if (startPos > endPos) @@ -360,7 +360,7 @@ bool CPDF_TextPageFind::IsMatchWholeWord(const CFX_WideString& csPageText, return true; } -bool CPDF_TextPageFind::ExtractSubString(CFX_WideString& rString, +bool CPDF_TextPageFind::ExtractSubString(WideString& rString, const wchar_t* lpszFullString, int iSubString, wchar_t chSep) { diff --git a/core/fpdftext/cpdf_textpagefind.h b/core/fpdftext/cpdf_textpagefind.h index cf8d3d1702..f9a28a0b0a 100644 --- a/core/fpdftext/cpdf_textpagefind.h +++ b/core/fpdftext/cpdf_textpagefind.h @@ -22,7 +22,7 @@ class CPDF_TextPageFind { explicit CPDF_TextPageFind(const CPDF_TextPage* pTextPage); ~CPDF_TextPageFind(); - bool FindFirst(const CFX_WideString& findwhat, + bool FindFirst(const WideString& findwhat, int flags, pdfium::Optional<FX_STRSIZE> startPos); bool FindNext(); @@ -31,11 +31,11 @@ class CPDF_TextPageFind { int GetMatchedCount() const; protected: - void ExtractFindWhat(const CFX_WideString& findwhat); - bool IsMatchWholeWord(const CFX_WideString& csPageText, + void ExtractFindWhat(const WideString& findwhat); + bool IsMatchWholeWord(const WideString& csPageText, FX_STRSIZE startPos, FX_STRSIZE endPos); - bool ExtractSubString(CFX_WideString& rString, + bool ExtractSubString(WideString& rString, const wchar_t* lpszFullString, int iSubString, wchar_t chSep); @@ -44,10 +44,10 @@ class CPDF_TextPageFind { private: std::vector<uint16_t> m_CharIndex; CFX_UnownedPtr<const CPDF_TextPage> m_pTextPage; - CFX_WideString m_strText; - CFX_WideString m_findWhat; + WideString m_strText; + WideString m_findWhat; int m_flags; - std::vector<CFX_WideString> m_csFindWhatArray; + std::vector<WideString> m_csFindWhatArray; pdfium::Optional<FX_STRSIZE> m_findNextStart; pdfium::Optional<FX_STRSIZE> m_findPreStart; bool m_bMatchCase; |